|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project |
| 3 | +import pytest |
| 4 | +import torch |
| 5 | +from torch import nn |
| 6 | + |
| 7 | +import vllm.kernels # noqa: F401 to register kernels |
| 8 | +from vllm import ir |
| 9 | +from vllm.compilation.passes.ir.lowering_pass import ( |
| 10 | + VllmIRLoweringPass, |
| 11 | +) |
| 12 | +from vllm.config import get_current_vllm_config |
| 13 | +from vllm.ir import ops |
| 14 | +from vllm.platforms import current_platform |
| 15 | + |
| 16 | +from ...backend import TestBackend |
| 17 | + |
| 18 | + |
| 19 | +class Model(nn.Module): |
| 20 | + def __init__(self, hidden_size=16, *args, **kwargs): |
| 21 | + super().__init__(*args, **kwargs) |
| 22 | + self.hidden_size = hidden_size |
| 23 | + self.weight = torch.ones(hidden_size, dtype=torch.bfloat16) |
| 24 | + |
| 25 | + def forward(self, x): |
| 26 | + x1 = x + 4.0 |
| 27 | + x2 = ops.rms_norm(x1, self.weight, 1e-5) |
| 28 | + x3 = x2 * 5.0 |
| 29 | + # no weight |
| 30 | + x4 = ops.rms_norm(x3, None, 1e-5) |
| 31 | + x5 = x4 / 2.0 |
| 32 | + # dispatch to native due to variance_size parameter |
| 33 | + x6 = ops.rms_norm(x5, self.weight, 1e-5, self.hidden_size // 2) |
| 34 | + return x6 + 3.0 |
| 35 | + |
| 36 | + |
| 37 | +@pytest.mark.parametrize("rms_provider", ops.rms_norm.supported_providers()) |
| 38 | +def test_lowering_rms_norm(rms_provider, default_vllm_config): |
| 39 | + torch.set_default_device(current_platform.device_type) |
| 40 | + |
| 41 | + lowering_pass = VllmIRLoweringPass(get_current_vllm_config()) |
| 42 | + backend = TestBackend(lowering_pass) |
| 43 | + backend_unlowered = TestBackend() |
| 44 | + |
| 45 | + model = Model() |
| 46 | + x = torch.randn(8, 16, dtype=torch.bfloat16) |
| 47 | + with ( |
| 48 | + ops.rms_norm.set_priority([rms_provider, "native"]), |
| 49 | + ir.enable_torch_wrap(True), |
| 50 | + ): |
| 51 | + compiled_model = torch.compile(model, backend=backend, fullgraph=True) |
| 52 | + compiled_unlowered_model = torch.compile( |
| 53 | + model, backend=backend_unlowered, fullgraph=True |
| 54 | + ) |
| 55 | + output = compiled_model(x) |
| 56 | + output_unlowered = compiled_unlowered_model(x) |
| 57 | + |
| 58 | + selected = lowering_pass.selected_impls["rms_norm"] |
| 59 | + assert len(selected) == 3 |
| 60 | + assert selected["rms_norm"] == rms_provider |
| 61 | + assert selected["rms_norm_1"] == rms_provider |
| 62 | + assert selected["rms_norm_2"] == "native" |
| 63 | + |
| 64 | + # Compiled function guards on global value, avoid recompilation |
| 65 | + with ir.enable_torch_wrap(True): |
| 66 | + output2 = compiled_model(x) |
| 67 | + |
| 68 | + torch.testing.assert_close(output_unlowered, output) |
| 69 | + torch.testing.assert_close(output_unlowered, output2) |
0 commit comments