Skip to content

Commit fdaa406

Browse files
Copilotjustinchuby
andcommitted
Fix infinite loop in constant folding of Resize: pre-evaluate output size check
Co-authored-by: justinchuby <11205048+justinchuby@users.noreply.github.com>
1 parent 2792978 commit fdaa406

2 files changed

Lines changed: 57 additions & 0 deletions

File tree

onnxscript/optimizer/_constant_folding.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,31 @@ def process_node(self, node: ir.Node, is_function: bool) -> Replacement | None:
12801280
node.name,
12811281
)
12821282

1283+
# Check estimated output size before calling the (potentially expensive) reference
1284+
# evaluator. This avoids hanging on ops like Resize with large output tensors where
1285+
# the reference implementation may be very slow.
1286+
for output in node.outputs:
1287+
if output.shape is not None and output.shape.is_static():
1288+
try:
1289+
shape_tuple = output.shape.numpy()
1290+
estimated_size = math.prod(shape_tuple)
1291+
if estimated_size > self.output_size_limit:
1292+
logger.info(
1293+
"Skipping constant folding for node %r due to large estimated "
1294+
"output size: %s > output_size_limit=%s",
1295+
node.name,
1296+
estimated_size,
1297+
self.output_size_limit,
1298+
)
1299+
return None
1300+
except Exception as e: # pylint: disable=broad-exception-caught
1301+
logger.debug(
1302+
"Could not estimate output size for node %r output '%s': %s",
1303+
node.name,
1304+
output.name,
1305+
e,
1306+
)
1307+
12831308
input_values = [_get_numpy_value(x) for x in node.inputs]
12841309

12851310
def convert(av):

onnxscript/optimizer/_constant_folding_test.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,38 @@ def test_constant_folding_creates_constant_nodes_in_function(self):
741741
constant_nodes = [n for n in func.graph if n.op_type == "Constant"]
742742
self.assertEqual(len(constant_nodes), 1)
743743

744+
def test_output_size_limit_prevents_evaluation_before_running(self):
745+
"""Test that output_size_limit is checked before calling the (expensive) reference
746+
evaluator. This is a regression test for https://github.com/microsoft/onnxscript/issues/2709
747+
where Resize with cubic mode on a large output tensor caused an infinite loop/hang
748+
because the reference evaluator was called before checking output size.
749+
"""
750+
# Create a model with Resize that has a small constant input but large output
751+
# sizes: [1, 2, 64, 64] -> output has 8192 elements, exceeding a small output_size_limit
752+
model_text = """
753+
<ir_version: 8, opset_import: [ "" : 18]>
754+
agraph () => (float[1, 2, 64, 64] output)
755+
<float[1, 2, 4, 4] x_const = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
756+
9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
757+
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
758+
9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}>
759+
{
760+
sizes = Constant <value_ints=[1, 2, 64, 64]> ()
761+
output = Resize <mode="nearest"> (x_const, , , sizes)
762+
}
763+
"""
764+
model = ir.from_onnx_text(model_text)
765+
# With a small output_size_limit, the Resize node should NOT be folded
766+
# (and more importantly, should NOT hang)
767+
optimized = self._fold(
768+
model,
769+
onnx_shape_inference=True,
770+
output_size_limit=100, # very small limit to prevent evaluation
771+
)
772+
# The Resize node should remain (not folded)
773+
ops = [node.op_type for node in optimized.graph]
774+
self.assertIn("Resize", ops)
775+
744776
def test_initializer_as_graph_output_is_not_removed(self):
745777
"""Test that an initializer that is a graph output is not removed during constant folding."""
746778
model = """

0 commit comments

Comments
 (0)