Add LazyTensor class to implement ir.TensorProtocol (#2232)

justinchuby · web-flow · commit 8e0e86b71010 · 2025-04-28T11:26:09.000-07:00
I used copilot to help implement #2231. The lazy tensor class allows users to delay transformations to the tensors until serialization time, which helps with memory usage and avoids the need to cache of unload intermediate tensor data to disk. Example ```py >>> import numpy as np >>> from onnxscript import ir >>> weights = np.array([[1, 2, 3]]) >>> def create_tensor(): ... # Delay applying transformations to the weights ... weights_t = weights.transpose() ... return ir.tensor(weights_t) >>> lazy_tensor = ir.LazyTensor(create_tensor, dtype=ir.DataType.INT64, shape=ir.Shape([1, 3])) >>> print(lazy_tensor.numpy()) [[1] [2] [3]] >>> print(lazy_tensor.tobytes()) b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00' ``` Fixes #2231 --- For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/microsoft/onnxscript/pull/2232?shareId=b91d512a-8d84-4aca-8545-899243396be5).
diff --git a/docs/ir/ir_api/core.md b/docs/ir/ir_api/core.md
@@ -48,6 +48,7 @@
     ir.Tensor
     ir.ExternalTensor
     ir.StringTensor
+    ir.LazyTensor
 ```
 
 ## Enums
diff --git a/onnxscript/ir/__init__.py b/onnxscript/ir/__init__.py
@@ -13,6 +13,7 @@
     "Tensor",
     "ExternalTensor",
     "StringTensor",
+    "LazyTensor",
     "SymbolicDim",
     "Shape",
     "TensorType",
@@ -104,6 +105,7 @@
     Graph,
     GraphView,
     Input,
+    LazyTensor,
     Model,
     Node,
     OptionalType,
diff --git a/onnxscript/ir/_core.py b/onnxscript/ir/_core.py
@@ -26,6 +26,7 @@
 from typing import (
     AbstractSet,
     Any,
+    Callable,
     Collection,
     Generic,
     Iterable,
@@ -113,7 +114,7 @@ def _repr_base(self) -> str:
     @property
     def size(self) -> int:
         """The number of elements in the tensor."""
-        return np.prod(self.shape.numpy())  # type: ignore[return-value,attr-defined]
+        return math.prod(self.shape.numpy())  # type: ignore[attr-defined]
 
     @property
     def nbytes(self) -> int:
@@ -853,6 +854,145 @@ def meta(self) -> _metadata.MetadataStore:
         return self._metadata
 
 
+class LazyTensor(TensorBase, _protocols.TensorProtocol):  # pylint: disable=too-many-ancestors
+    """A tensor that lazily evaluates a function to get the actual tensor.
+
+    This class takes a function returning an `ir.TensorProtocol`, a dtype, and a shape argument.
+    The function is lazily evaluated to get the actual tensor when `tobytes()` or `numpy()` is called.
+
+    Example::
+
+        >>> import numpy as np
+        >>> from onnxscript import ir
+        >>> weights = np.array([[1, 2, 3]])
+        >>> def create_tensor():  # Delay applying transformations to the weights
+        ...     weights_t = weights.transpose()
+        ...     return ir.tensor(weights_t)
+        >>> lazy_tensor = ir.LazyTensor(create_tensor, dtype=ir.DataType.INT64, shape=ir.Shape([1, 3]))
+        >>> print(lazy_tensor.numpy())
+        [[1]
+         [2]
+         [3]]
+
+    Attributes:
+        func: The function that returns the actual tensor.
+        dtype: The data type of the tensor.
+        shape: The shape of the tensor.
+        cache: Whether to cache the result of the function. If False,
+            the function is called every time the tensor content is accessed.
+            If True, the function is called only once and the result is cached in memory.
+            Default is False.
+        name: The name of the tensor.
+        doc_string: The documentation string.
+        metadata_props: The metadata properties.
+    """
+
+    __slots__ = (
+        "_dtype",
+        "_func",
+        "_metadata",
+        "_metadata_props",
+        "_shape",
+        "_tensor",
+        "cache",
+        "doc_string",
+        "name",
+    )
+
+    def __init__(
+        self,
+        func: Callable[[], _protocols.TensorProtocol],
+        dtype: _enums.DataType,
+        shape: Shape,
+        *,
+        cache: bool = False,
+        name: str | None = None,
+        doc_string: str | None = None,
+        metadata_props: dict[str, str] | None = None,
+    ) -> None:
+        """Initialize a lazy tensor.
+
+        Args:
+            func: The function that returns the actual tensor.
+            dtype: The data type of the tensor.
+            shape: The shape of the tensor.
+            cache: Whether to cache the result of the function.
+            name: The name of the tensor.
+            doc_string: The documentation string.
+            metadata_props: The metadata properties.
+        """
+        self._func = func
+        self._dtype = dtype
+        self._shape = shape
+        self._tensor: _protocols.TensorProtocol | None = None
+        self.cache = cache
+        self.name = name
+        self.doc_string = doc_string
+        self._metadata: _metadata.MetadataStore | None = None
+        self._metadata_props = metadata_props
+
+    def _evaluate(self) -> _protocols.TensorProtocol:
+        """Evaluate the function to get the actual tensor."""
+        if not self.cache:
+            return self._func()
+
+        # Cache the tensor
+        if self._tensor is None:
+            self._tensor = self._func()
+        return self._tensor
+
+    def __array__(self, dtype: Any = None) -> np.ndarray:
+        return self._evaluate().__array__(dtype)
+
+    def __dlpack__(self, *, stream: Any = None) -> Any:
+        return self._evaluate().__dlpack__(stream=stream)
+
+    def __dlpack_device__(self) -> tuple[int, int]:
+        return self._evaluate().__dlpack_device__()
+
+    def __repr__(self) -> str:
+        return f"{self._repr_base()}(func={self._func!r}, name={self.name!r})"
+
+    @property
+    def raw(self) -> Callable[[], _protocols.TensorProtocol]:
+        return self._func
+
+    @property
+    def dtype(self) -> _enums.DataType:
+        """The data type of the tensor. Immutable."""
+        return self._dtype
+
+    @property
+    def shape(self) -> Shape:
+        """The shape of the tensor. Immutable."""
+        return self._shape
+
+    def numpy(self) -> np.ndarray:
+        """Return the tensor as a numpy array."""
+        return self._evaluate().numpy()
+
+    def tobytes(self) -> bytes:
+        """Return the bytes of the tensor."""
+        return self._evaluate().tobytes()
+
+    @property
+    def metadata_props(self) -> dict[str, str]:
+        if self._metadata_props is None:
+            self._metadata_props = {}
+        return self._metadata_props
+
+    @property
+    def meta(self) -> _metadata.MetadataStore:
+        """The metadata store for intermediate analysis.
+
+        Write to the :attr:`metadata_props` if you would like the metadata to be serialized
+        to the ONNX proto.
+        """
+        if self._metadata is None:
+            self._metadata = _metadata.MetadataStore()
+        return self._metadata
+
+
 class SymbolicDim(_protocols.SymbolicDimProtocol, _display.PrettyPrintable):
     __slots__ = ("_value",)
 
@@ -2183,7 +2323,7 @@ def sort(self) -> None:
         sorted_nodes_by_graph: dict[Graph, list[Node]] = {
             graph: [] for graph in {node.graph for node in nodes if node.graph is not None}
         }
-        # TODO: Explain why we need to store direct predecessors and children and why
+        # TODO(justinchuby): Explain why we need to store direct predecessors and children and why
         # we only need to store the direct ones
 
         # The depth of a node is defined as the number of direct children it has
diff --git a/onnxscript/ir/_core_test.py b/onnxscript/ir/_core_test.py
@@ -1312,5 +1312,38 @@ def test_as_graphs(self):
         self.assertIsInstance(attr.as_graphs()[0], _core.Graph)
 
 
+class LazyTensorTest(unittest.TestCase):
+    def test_lazy_tensor_initialization(self):
+        def tensor_fn():
+            return ir.tensor([1, 2, 3], dtype=ir.DataType.INT64)
+
+        lazy_tensor = _core.LazyTensor(
+            tensor_fn, dtype=ir.DataType.INT64, shape=ir.Shape((3,))
+        )
+        self.assertEqual(lazy_tensor.dtype, ir.DataType.INT64)
+        self.assertEqual(lazy_tensor.shape, (3,))
+
+    def test_lazy_tensor_numpy(self):
+        def tensor_fn():
+            return ir.tensor([1, 2, 3], dtype=ir.DataType.INT64)
+
+        lazy_tensor = _core.LazyTensor(
+            tensor_fn, dtype=ir.DataType.INT64, shape=ir.Shape((3,))
+        )
+        np.testing.assert_array_equal(lazy_tensor.numpy(), np.array([1, 2, 3]))
+
+    def test_lazy_tensor_tobytes(self):
+        def tensor_fn():
+            return ir.tensor([1, 2, 3], dtype=ir.DataType.INT64)
+
+        lazy_tensor = _core.LazyTensor(
+            tensor_fn, dtype=ir.DataType.INT64, shape=ir.Shape((3,))
+        )
+        self.assertEqual(
+            lazy_tensor.tobytes(),
+            b"\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00",
+        )
+
+
 if __name__ == "__main__":
     unittest.main()