|
1 | 1 | """AgentMixin: agent execution, output parsing, rate limit handling.""" |
2 | 2 | from __future__ import annotations |
3 | 3 |
|
| 4 | +import json |
4 | 5 | import os |
5 | 6 | import re |
6 | 7 | import signal |
|
11 | 12 | from datetime import datetime, timedelta |
12 | 13 | from pathlib import Path |
13 | 14 |
|
| 15 | + |
| 16 | +def _parse_claude_json(stdout: str) -> dict | None: |
| 17 | + """Parse output of `claude --output-format json`. Returns None on any failure. |
| 18 | +
|
| 19 | + Tolerates trailing whitespace and the rare case where stdout has leading |
| 20 | + non-JSON debug output by scanning for the final result-shaped object. |
| 21 | + Never raises — callers fall back to treating stdout as plain text. |
| 22 | + """ |
| 23 | + text = (stdout or "").strip() |
| 24 | + if not text: |
| 25 | + return None |
| 26 | + try: |
| 27 | + return json.loads(text) |
| 28 | + except json.JSONDecodeError: |
| 29 | + # Last-resort: locate the final result envelope |
| 30 | + marker = '{"type":"result"' |
| 31 | + start = text.rfind(marker) |
| 32 | + if start == -1: |
| 33 | + return None |
| 34 | + try: |
| 35 | + return json.loads(text[start:]) |
| 36 | + except json.JSONDecodeError: |
| 37 | + return None |
| 38 | + |
| 39 | + |
| 40 | +def _extract_usage(parsed: dict) -> dict: |
| 41 | + """Pull token/cost fields out of parsed claude JSON. Zero-default so callers |
| 42 | + don't need null checks. Always returns a complete dict shape.""" |
| 43 | + parsed = parsed or {} |
| 44 | + u = parsed.get("usage") or {} |
| 45 | + model_usage = parsed.get("modelUsage") or {} |
| 46 | + model = next(iter(model_usage), "") |
| 47 | + return { |
| 48 | + "model": model, |
| 49 | + "input_tokens": int(u.get("input_tokens") or 0), |
| 50 | + "output_tokens": int(u.get("output_tokens") or 0), |
| 51 | + "cache_read_tokens": int(u.get("cache_read_input_tokens") or 0), |
| 52 | + "cache_creation_tokens": int(u.get("cache_creation_input_tokens") or 0), |
| 53 | + "cost_usd": float(parsed.get("total_cost_usd") or 0.0), |
| 54 | + "duration_api_ms": int(parsed.get("duration_api_ms") or 0), |
| 55 | + } |
| 56 | + |
| 57 | + |
| 58 | +def _fmt_tok(n: int) -> str: |
| 59 | + """Format a token count as compact human-readable (e.g. 12.3k, 1.2M).""" |
| 60 | + n = int(n or 0) |
| 61 | + if n >= 1_000_000: |
| 62 | + return f"{n / 1_000_000:.1f}M" |
| 63 | + if n >= 1_000: |
| 64 | + return f"{n / 1_000:.1f}k" |
| 65 | + return str(n) |
| 66 | + |
14 | 67 | from ark.paths import get_config_dir |
15 | 68 | from ark.ui import ( |
16 | 69 | ElapsedTimer, RateLimitCountdown, agent_styled, styled, Style, Icons, |
@@ -434,7 +487,7 @@ def run_agent(self, agent_type: str, task: str, timeout: int = 1800, |
434 | 487 | "claude", "-p", full_prompt, |
435 | 488 | "--permission-mode", "bypassPermissions", |
436 | 489 | "--no-session-persistence", |
437 | | - "--output-format", "text", |
| 490 | + "--output-format", "json", |
438 | 491 | "--append-system-prompt", self._build_path_boundary(), |
439 | 492 | ] |
440 | 493 | ark_model = self._get_ark_model() |
@@ -469,10 +522,23 @@ def run_agent(self, agent_type: str, task: str, timeout: int = 1800, |
469 | 522 |
|
470 | 523 | timer.start() |
471 | 524 | result = "" |
| 525 | + usage_record = None # populated when claude returns parseable JSON |
472 | 526 |
|
473 | 527 | try: |
474 | 528 | stdout, stderr = process.communicate(timeout=timeout) |
475 | | - result = stdout |
| 529 | + # claude --output-format json: parse the envelope, extract `result` |
| 530 | + # field for downstream and `usage` for cost tracking. Fall back to |
| 531 | + # raw stdout on parse failure so the existing empty-run / failure |
| 532 | + # paths still trigger normally. |
| 533 | + if self.model == "claude": |
| 534 | + parsed = _parse_claude_json(stdout) |
| 535 | + if parsed is not None: |
| 536 | + result = parsed.get("result", "") or "" |
| 537 | + usage_record = _extract_usage(parsed) |
| 538 | + else: |
| 539 | + result = stdout |
| 540 | + else: |
| 541 | + result = stdout |
476 | 542 |
|
477 | 543 | if stderr: |
478 | 544 | stderr_lower = stderr.lower() |
@@ -517,7 +583,17 @@ def run_agent(self, agent_type: str, task: str, timeout: int = 1800, |
517 | 583 | timer.stop() |
518 | 584 | self.log(f"Agent {agent_type} timed out ({timeout}s)", "WARN") |
519 | 585 | stdout, _ = process.communicate() |
520 | | - result = stdout |
| 586 | + # JSON envelope is usually missing on timeout (truncated mid-stream). |
| 587 | + # Try once; on failure fall back to raw text and let empty-run handle it. |
| 588 | + if self.model == "claude": |
| 589 | + parsed = _parse_claude_json(stdout) |
| 590 | + if parsed is not None: |
| 591 | + result = parsed.get("result", "") or "" |
| 592 | + usage_record = _extract_usage(parsed) |
| 593 | + else: |
| 594 | + result = stdout |
| 595 | + else: |
| 596 | + result = stdout |
521 | 597 |
|
522 | 598 | watchdog.stop() |
523 | 599 | timer.stop() |
@@ -589,31 +665,78 @@ def run_agent(self, agent_type: str, task: str, timeout: int = 1800, |
589 | 665 | start_time = time.time() |
590 | 666 | continue |
591 | 667 | self.send_notification("Agent Error Failed", f"{agent_type}: {e}", priority="critical") |
592 | | - self._agent_stats.append({ |
| 668 | + err_stat = { |
593 | 669 | "agent_type": agent_type, |
594 | 670 | "elapsed_seconds": elapsed, |
595 | 671 | "prompt_len": 0, |
596 | 672 | "output_len": 0, |
597 | 673 | "timestamp": datetime.now().isoformat(), |
598 | 674 | "error": str(e), |
599 | | - }) |
| 675 | + # Zero-default cost fields so aggregation never sees missing keys |
| 676 | + "model": "", |
| 677 | + "input_tokens": 0, |
| 678 | + "output_tokens": 0, |
| 679 | + "cache_read_tokens": 0, |
| 680 | + "cache_creation_tokens": 0, |
| 681 | + "cost_usd": 0.0, |
| 682 | + "duration_api_ms": 0, |
| 683 | + } |
| 684 | + self._agent_stats.append(err_stat) |
| 685 | + try: |
| 686 | + self._write_cost_report() |
| 687 | + except Exception: |
| 688 | + pass |
600 | 689 | return "" |
601 | 690 |
|
602 | 691 | timer.stop() |
603 | 692 | self.log_step(f"{Icons.for_agent(agent_type)} {agent_styled(agent_type, f'[{agent_type}]')} completed ({elapsed}s)", "success") |
604 | 693 |
|
| 694 | + # One-line cost summary (only when claude returned parseable usage) |
| 695 | + if usage_record: |
| 696 | + in_tok = usage_record["input_tokens"] |
| 697 | + out_tok = usage_record["output_tokens"] |
| 698 | + cr = usage_record["cache_read_tokens"] |
| 699 | + cc = usage_record["cache_creation_tokens"] |
| 700 | + cached_in = cr + cc |
| 701 | + total_in = in_tok + cached_in |
| 702 | + hit_pct = int(100 * cr / total_in) if total_in else 0 |
| 703 | + self.log_step( |
| 704 | + f" 💰 ${usage_record['cost_usd']:.4f} " |
| 705 | + f"in:{_fmt_tok(in_tok)} out:{_fmt_tok(out_tok)} " |
| 706 | + f"cache:{_fmt_tok(cached_in)}({hit_pct}% hit)", |
| 707 | + "info" |
| 708 | + ) |
| 709 | + |
605 | 710 | # Agent summary |
606 | 711 | summary_items = self._summarize_agent_output(agent_type, result) |
607 | 712 | if summary_items: |
608 | 713 | self.log_summary_box(f"{agent_type.upper()} Summary", summary_items) |
609 | 714 |
|
610 | | - # Cost tracking |
611 | | - self._agent_stats.append({ |
| 715 | + # Cost tracking — extend with real token/cost when claude JSON was parsed |
| 716 | + stat = { |
612 | 717 | "agent_type": agent_type, |
613 | 718 | "elapsed_seconds": elapsed, |
614 | 719 | "prompt_len": len(full_prompt), |
615 | 720 | "output_len": len(result) if result else 0, |
616 | 721 | "timestamp": datetime.now().isoformat(), |
617 | | - }) |
| 722 | + # Zero-defaults so cost_report aggregation never sees missing keys |
| 723 | + "model": "", |
| 724 | + "input_tokens": 0, |
| 725 | + "output_tokens": 0, |
| 726 | + "cache_read_tokens": 0, |
| 727 | + "cache_creation_tokens": 0, |
| 728 | + "cost_usd": 0.0, |
| 729 | + "duration_api_ms": 0, |
| 730 | + } |
| 731 | + if usage_record: |
| 732 | + stat.update(usage_record) |
| 733 | + self._agent_stats.append(stat) |
| 734 | + |
| 735 | + # Live cost report — written after every agent so the webapp SSE stream |
| 736 | + # can pick up updates within ~2s. Failures here must never break the run. |
| 737 | + try: |
| 738 | + self._write_cost_report() |
| 739 | + except Exception as exc: |
| 740 | + self.log(f" cost report write failed: {exc}", "WARN") |
618 | 741 |
|
619 | 742 | return result |
0 commit comments