Skip to content

Commit 803b65d

Browse files
authored
Misc fixes to prepare for vllm v0.13.0 version bump (#739)
1 parent 937310a commit 803b65d

File tree

2 files changed

+18
-7
lines changed

2 files changed

+18
-7
lines changed

benchmarks/generator/throughput.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
from forge.types import LauncherConfig, ProvisionerConfig
6262
from forge.util.config import parse
6363
from omegaconf import DictConfig
64+
from tqdm import tqdm
6465
from vllm import __version__ as vllm_version
6566

6667
if vllm_version >= "0.13.0":
@@ -142,12 +143,25 @@ async def run_throughput_benchmark(
142143
prompts = [req.prompt for req in requests]
143144
request_ids = [req.request_id for req in requests]
144145

145-
start = time.perf_counter()
146146
# TODO: here we're measuring two things together: compute (vllm) and io (monarch).
147147
# We shall consider finer grained metrics collection to distinguish the two.
148-
completions = await asyncio.gather(
149-
*[generator.generate.route(prompt) for prompt in prompts]
150-
)
148+
start = time.perf_counter()
149+
# Create tasks with their indices to preserve order
150+
tasks = [
151+
asyncio.create_task(generator.generate.route(prompt)) for prompt in prompts
152+
]
153+
with tqdm(
154+
total=len(tasks),
155+
desc="Processing requests",
156+
unit="req",
157+
smoothing=0, # Show instantaneous rate, not smoothed
158+
) as pbar:
159+
for coro in asyncio.as_completed(tasks):
160+
await coro
161+
pbar.update(1)
162+
163+
# Gather results in original order
164+
completions = [task.result() for task in tasks]
151165
end = time.perf_counter()
152166

153167
elapsed_time = end - start

tests/unit_tests/test_generator_config.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ def test_generator_default_initialization(self):
4545
self.assertEqual(generator.engine_args.tensor_parallel_size, 1)
4646
self.assertEqual(generator.engine_args.pipeline_parallel_size, 1)
4747
self.assertFalse(generator.engine_args.enforce_eager)
48-
self.assertTrue(generator.engine_args._is_v1_supported_oracle())
4948

5049
# Sampling defaults
5150
self.assertEqual(generator.sampling_params.n, 1)
@@ -90,7 +89,6 @@ def test_generator_with_dict_configs(self):
9089
self.assertEqual(generator.engine_args.gpu_memory_utilization, 0.1)
9190
self.assertEqual(generator.engine_args.max_model_len, 1024)
9291
self.assertTrue(generator.engine_args.enforce_eager)
93-
self.assertTrue(generator.engine_args._is_v1_supported_oracle())
9492

9593
self.assertEqual(generator.sampling_params.n, 2)
9694
self.assertEqual(generator.sampling_params.max_tokens, 32)
@@ -127,7 +125,6 @@ def test_generator_yaml_config_loading(self):
127125
self.assertEqual(generator.engine_args.tensor_parallel_size, 1)
128126
self.assertEqual(generator.engine_args.pipeline_parallel_size, 1)
129127
self.assertTrue(generator.engine_args.enforce_eager)
130-
self.assertTrue(generator.engine_args._is_v1_supported_oracle())
131128

132129
self.assertEqual(generator.sampling_params.n, 2)
133130
self.assertEqual(generator.sampling_params.max_tokens, 32)

0 commit comments

Comments
 (0)