Skip to content

Commit 66b3022

Browse files
author
Wojciech Napierała
committed
Enhance automated review temperature adjustment for OpenAI O-series models
- Updated the ReviewEngine to dynamically adjust the sampling temperature for O-series models based on API constraints. - Added a new method to resolve the appropriate temperature based on the model name. - Included a test to verify that the correct temperature is set for O-series models. - Updated CHANGELOG to document this enhancement.
1 parent 9e3a777 commit 66b3022

3 files changed

Lines changed: 64 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ All notable changes to this project will be documented in this file. The format
3333

3434
### Changed
3535
- Startup health checks now warn (rather than exit) when Redis or ChromaDB are unavailable, enabling seamless in-memory fallbacks.
36+
- Automated review automatically adjusts sampling temperature for OpenAI O-series models to satisfy API constraints.
3637
## [0.1.1] - 2025-11-07
3738

3839
### Added

core/review.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
v0.5 - 2025-11-07 - Normalised metadata serialisation for automated review payloads.
99
v0.6 - 2025-11-07 - Applied provider-aware routing for automated review models.
1010
v0.7 - 2025-11-07 - Logged automated review failures before surfacing to callers.
11+
v0.8 - 2025-11-07 - Adjusted reviewer temperature for O-series OpenAI models.
1112
"""
1213

1314
from __future__ import annotations
@@ -113,6 +114,7 @@ def _run_automated_review(
113114
try:
114115
self._logger.debug("Running automated review with model %s", model)
115116
model_name, provider_kwargs = self._resolve_model_configuration()
117+
sampling_temperature = self._resolve_temperature(model_name)
116118
payload = {
117119
"task_prompt": request.prompt,
118120
"workflow": request.workflow,
@@ -138,7 +140,7 @@ def _run_automated_review(
138140
),
139141
},
140142
],
141-
temperature=0.0,
143+
temperature=sampling_temperature,
142144
request_timeout=self._config.llm.timeouts.request_seconds,
143145
**provider_kwargs,
144146
)
@@ -193,6 +195,15 @@ def _normalise_verdict(raw_verdict: Optional[str]) -> str:
193195
return "fail-auto"
194196
return verdict
195197

198+
@staticmethod
199+
def _resolve_temperature(model_name: str) -> float:
200+
"""Return the sampling temperature to use for the given reviewer model."""
201+
normalized = model_name.lower()
202+
short_name = normalized.split("/")[-1]
203+
if short_name.startswith(("o1", "o3", "o-")) or short_name.startswith("o"):
204+
return 1.0
205+
return 0.0
206+
196207
def _activate_litellm_debug(self) -> None:
197208
"""Enable LiteLLM debug logging for automated review when configured."""
198209
if not self._config.llm.enable_debug:

tests/test_review_engine.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from config import settings
1616
from core.live_loop import LiveTaskLoop
1717
from core.review import ReviewEngine
18+
from models.workflows import TaskRequest, TaskResult
1819

1920

2021
def _entry_has_user_task(entry: Dict[str, object], expected: str) -> bool:
@@ -186,3 +187,53 @@ def test_resolve_model_configuration_uses_azure_provider(monkeypatch: pytest.Mon
186187
assert model_name == "azure/gpt-4.1"
187188
assert kwargs["custom_llm_provider"] == "azure"
188189
assert kwargs["api_base"] == "https://example.openai.azure.com"
190+
191+
192+
def test_review_engine_sets_o_series_temperature(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
193+
config = _load_sample_config(tmp_path)
194+
config.review.auto_reviewer_model = "o3-mini"
195+
config.review.auto_reviewer_provider = None
196+
197+
captured: Dict[str, Any] = {}
198+
199+
def _fake_completion(*args: Any, temperature: float, **kwargs: Any) -> Dict[str, Any]:
200+
captured["temperature"] = temperature
201+
return {
202+
"choices": [
203+
{
204+
"message": {
205+
"content": (
206+
"VERDICT: PASS\n"
207+
"REASONING: Compliant.\n"
208+
"QUALITY_SCORE: 0.9\n"
209+
"SUGGESTIONS:\n"
210+
"- None."
211+
)
212+
}
213+
}
214+
],
215+
"usage": {},
216+
}
217+
218+
class DummyTimeout(Exception):
219+
"""Placeholder timeout exception."""
220+
221+
dummy_litellm = SimpleNamespace(
222+
completion=_fake_completion,
223+
Timeout=DummyTimeout,
224+
)
225+
monkeypatch.setattr("core.review.litellm", dummy_litellm)
226+
227+
engine = ReviewEngine(config)
228+
monkeypatch.setattr(
229+
engine,
230+
"_resolve_model_configuration",
231+
lambda: ("o3-mini", {}),
232+
)
233+
234+
request = TaskRequest(workflow="fast", prompt="demo")
235+
result = TaskResult(workflow="fast", content="ok", latency_seconds=0.1)
236+
237+
engine.perform_review(request, result)
238+
239+
assert captured.get("temperature") == pytest.approx(1.0)

0 commit comments

Comments
 (0)