Skip to content

Commit af97295

Browse files
committed
feat: add fields provided by OpenRouter to stats.json
1 parent c6b7cdc commit af97295

1 file changed

Lines changed: 103 additions & 0 deletions

File tree

src/balatrollm/data_collection.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,16 @@ class RunStats:
8383
total_reasoning_tokens: Total reasoning tokens across all calls.
8484
total_tokens: Total tokens across all calls.
8585
total_response_time_ms: Total response time in milliseconds.
86+
total_cost: Total cost across all LLM calls.
87+
avg_cost_per_call: Average cost per successful LLM call.
88+
total_upstream_inference_cost: Total upstream inference cost.
89+
total_upstream_prompt_cost: Total upstream prompt cost.
90+
total_upstream_completion_cost: Total upstream completion cost.
91+
providers_used: List of unique providers used during the run.
92+
reasoning_calls: Number of calls that included reasoning content.
93+
avg_reasoning_content_length: Average length of reasoning content.
94+
total_reasoning_content_length: Total length of all reasoning content.
95+
request_ids: List of request IDs for tracing and debugging.
8696
"""
8797

8898
# Game Performance
@@ -114,6 +124,24 @@ class RunStats:
114124
total_tokens: int = 0
115125
total_response_time_ms: float = 0.0
116126

127+
# Cost Tracking
128+
total_cost: float = 0.0
129+
avg_cost_per_call: float = 0.0
130+
total_upstream_inference_cost: float = 0.0
131+
total_upstream_prompt_cost: float = 0.0
132+
total_upstream_completion_cost: float = 0.0
133+
134+
# Provider Tracking
135+
providers_used: list[str] = field(default_factory=list)
136+
137+
# Reasoning Analysis
138+
reasoning_calls: int = 0
139+
avg_reasoning_content_length: float = 0.0
140+
total_reasoning_content_length: int = 0
141+
142+
# Request Tracking
143+
request_ids: list[str] = field(default_factory=list)
144+
117145

118146
@dataclass
119147
class RunStatsCollector:
@@ -361,6 +389,12 @@ def calculate_stats(self) -> RunStats:
361389
output_tokens = []
362390
reasoning_tokens = []
363391
total_tokens = []
392+
costs = []
393+
upstream_inference_costs = []
394+
upstream_prompt_costs = []
395+
upstream_completion_costs = []
396+
reasoning_content_lengths = []
397+
providers = []
364398

365399
with open(responses_path, "r") as f:
366400
for line in f:
@@ -374,14 +408,68 @@ def calculate_stats(self) -> RunStats:
374408
message = body.get("choices", [{}])[0].get("message", {})
375409
usage = body.get("usage", {})
376410

411+
# Token tracking
377412
if "prompt_tokens" in usage:
378413
input_tokens.append(usage["prompt_tokens"])
379414
if "completion_tokens" in usage:
380415
output_tokens.append(usage["completion_tokens"])
381416
if "reasoning_tokens" in usage:
382417
reasoning_tokens.append(usage["reasoning_tokens"])
418+
elif usage.get("completion_tokens_details", {}).get(
419+
"reasoning_tokens"
420+
):
421+
reasoning_tokens.append(
422+
usage["completion_tokens_details"]["reasoning_tokens"]
423+
)
383424
if "total_tokens" in usage:
384425
total_tokens.append(usage["total_tokens"])
426+
427+
# Cost tracking
428+
if "cost" in usage and usage["cost"] is not None:
429+
costs.append(usage["cost"])
430+
cost_details = usage.get("cost_details", {})
431+
if (
432+
"upstream_inference_cost" in cost_details
433+
and cost_details["upstream_inference_cost"] is not None
434+
):
435+
upstream_inference_costs.append(
436+
cost_details["upstream_inference_cost"]
437+
)
438+
if (
439+
"upstream_inference_prompt_cost" in cost_details
440+
and cost_details["upstream_inference_prompt_cost"]
441+
is not None
442+
):
443+
upstream_prompt_costs.append(
444+
cost_details["upstream_inference_prompt_cost"]
445+
)
446+
if (
447+
"upstream_inference_completions_cost" in cost_details
448+
and cost_details["upstream_inference_completions_cost"]
449+
is not None
450+
):
451+
upstream_completion_costs.append(
452+
cost_details["upstream_inference_completions_cost"]
453+
)
454+
455+
# Provider tracking
456+
if "provider" in body:
457+
provider = body["provider"]
458+
providers.append(provider)
459+
if provider not in stats.providers_used:
460+
stats.providers_used.append(provider)
461+
462+
# Request ID tracking
463+
request_id = response.get("response", {}).get("request_id")
464+
if request_id:
465+
stats.request_ids.append(request_id)
466+
467+
# Reasoning content analysis
468+
reasoning_content = message.get("reasoning_content", "")
469+
if reasoning_content:
470+
stats.reasoning_calls += 1
471+
reasoning_content_lengths.append(len(reasoning_content))
472+
385473
if message.get("tool_calls") is None:
386474
stats.invalid_responses += 1
387475

@@ -407,4 +495,19 @@ def calculate_stats(self) -> RunStats:
407495
sum(total_tokens) / len(total_tokens) if total_tokens else 0.0
408496
)
409497

498+
# Calculate cost totals and averages
499+
stats.total_cost = sum(costs)
500+
stats.avg_cost_per_call = sum(costs) / len(costs) if costs else 0.0
501+
stats.total_upstream_inference_cost = sum(upstream_inference_costs)
502+
stats.total_upstream_prompt_cost = sum(upstream_prompt_costs)
503+
stats.total_upstream_completion_cost = sum(upstream_completion_costs)
504+
505+
# Calculate reasoning content averages
506+
stats.total_reasoning_content_length = sum(reasoning_content_lengths)
507+
stats.avg_reasoning_content_length = (
508+
sum(reasoning_content_lengths) / len(reasoning_content_lengths)
509+
if reasoning_content_lengths
510+
else 0.0
511+
)
512+
410513
return stats

0 commit comments

Comments
 (0)