@@ -55,6 +55,87 @@ def _extract_usage(parsed: dict) -> dict:
5555 }
5656
5757
58+ def _parse_gemini_json (stdout : str ) -> dict | None :
59+ """Parse output of `gemini -o json`. Returns None on failure."""
60+ text = (stdout or "" ).strip ()
61+ if not text :
62+ return None
63+ try :
64+ # gemini -o json usually outputs the JSON object directly,
65+ # but may have leading "Loaded cached credentials" etc.
66+ if "{" in text :
67+ text = text [text .find ("{" ):]
68+ return json .loads (text )
69+ except json .JSONDecodeError :
70+ return None
71+
72+
73+ def _calculate_gemini_cost (model_id : str , input_tok : int , output_tok : int ) -> float :
74+ """
75+ Calculate estimated cost for Gemini models (April 2026 pricing).
76+ """
77+ input_tok = int (input_tok or 0 )
78+ output_tok = int (output_tok or 0 )
79+ model_lower = (model_id or "" ).lower ()
80+
81+ # Pricing per 1M tokens
82+ if "3.1-pro" in model_lower :
83+ in_rate = 2.00
84+ out_rate = 12.00
85+ elif "3.1-flash" in model_lower :
86+ in_rate = 0.50
87+ out_rate = 3.00
88+ else :
89+ # Default to pro
90+ in_rate = 2.00
91+ out_rate = 12.00
92+
93+ return (input_tok / 1_000_000 * in_rate ) + (output_tok / 1_000_000 * out_rate )
94+
95+
96+ def _extract_gemini_usage (parsed : dict ) -> dict :
97+ """Aggregate token usage info from Gemini CLI's nested stats schema."""
98+ parsed = parsed or {}
99+ stats = parsed .get ("stats" , {})
100+ models = stats .get ("models" ) or stats .get ("model" ) or {}
101+
102+ total_in = 0
103+ total_out = 0
104+ total_cached = 0
105+ total_thoughts = 0
106+ total_latency = 0
107+ main_model = ""
108+
109+ for mid , info in models .items ():
110+ t = info .get ("tokens" , {})
111+ total_in += int (t .get ("input" ) or 0 )
112+ total_out += int (t .get ("candidates" ) or 0 )
113+ total_cached += int (t .get ("cached" ) or 0 )
114+ total_thoughts += int (t .get ("thoughts" ) or 0 )
115+
116+ api = info .get ("api" , {})
117+ total_latency += int (api .get ("totalLatencyMs" ) or 0 )
118+
119+ # Heuristic for the "main" model being used for the response
120+ if "roles" in info and "main" in info ["roles" ]:
121+ main_model = mid
122+
123+ if not main_model and models :
124+ main_model = next (iter (models ))
125+
126+ cost_usd = _calculate_gemini_cost (main_model , total_in , total_out )
127+
128+ return {
129+ "model" : main_model ,
130+ "input_tokens" : total_in ,
131+ "output_tokens" : total_out ,
132+ "cache_read_tokens" : total_cached ,
133+ "cache_creation_tokens" : 0 , # Gemini schema doesn't distinguish creation
134+ "cost_usd" : cost_usd ,
135+ "duration_api_ms" : total_latency ,
136+ }
137+
138+
58139def _fmt_tok (n : int ) -> str :
59140 """Format a token count as compact human-readable (e.g. 12.3k, 1.2M)."""
60141 n = int (n or 0 )
@@ -485,16 +566,26 @@ def run_agent(self, agent_type: str, task: str, timeout: int = 1800,
485566 for attempt in range (1 , MAX_RETRIES + 1 ):
486567 try :
487568 cmd = []
488- # Strip CLAUDECODE env var to prevent nested-session detection
489- env = {k : v for k , v in os .environ .items () if k != "CLAUDECODE" }
569+ # Strip CLAUDECODE to prevent nested-session detection.
570+ # Strip GEMINI_API_KEY / GOOGLE_API_KEY so the Gemini CLI uses
571+ # OAuth credentials from ~/.gemini/oauth_creds.json rather than
572+ # the API key (which is only for Deep Research via Python API).
573+ _strip = {"CLAUDECODE" , "GEMINI_API_KEY" , "GOOGLE_API_KEY" }
574+ env = {k : v for k , v in os .environ .items () if k not in _strip }
490575 if self .model == "gemini" :
491576 boundary = self ._build_path_boundary ()
492577 cmd = [
493578 "gemini" ,
494- "-m " , "auto " ,
579+ "-p " , f"[SYSTEM RULE] { boundary } \n \n { full_prompt } " ,
495580 "--approval-mode" , "auto_edit" ,
496- f"[SYSTEM RULE] { boundary } \n \n { full_prompt } " ,
581+ "-o" , "json " ,
497582 ]
583+ # Respect model_variant if set
584+ ark_model = self ._get_ark_model ()
585+ if ark_model :
586+ cmd .extend (["-m" , ark_model ])
587+ else :
588+ cmd .extend (["-m" , "auto" ])
498589 elif self .model == "claude" :
499590 cmd = [
500591 "claude" , "-p" , full_prompt ,
@@ -518,6 +609,9 @@ def run_agent(self, agent_type: str, task: str, timeout: int = 1800,
518609 self .log (f"Unsupported model backend: { self .model } " , "ERROR" )
519610 return ""
520611
612+ ark_model = self ._get_ark_model ()
613+ self .log (f"Backend model: { self .model } | Model: { ark_model or 'default' } " , "INFO" )
614+
521615 process = subprocess .Popen (
522616 cmd ,
523617 stdin = subprocess .DEVNULL , # Don't hold terminal pty fd
@@ -552,6 +646,13 @@ def run_agent(self, agent_type: str, task: str, timeout: int = 1800,
552646 usage_record = _extract_usage (parsed )
553647 else :
554648 result = stdout
649+ elif self .model == "gemini" :
650+ parsed = _parse_gemini_json (stdout )
651+ if parsed is not None :
652+ result = parsed .get ("response" , "" ) or ""
653+ usage_record = _extract_gemini_usage (parsed )
654+ else :
655+ result = stdout
555656 else :
556657 result = stdout
557658
@@ -614,6 +715,13 @@ def run_agent(self, agent_type: str, task: str, timeout: int = 1800,
614715 usage_record = _extract_usage (parsed )
615716 else :
616717 result = stdout
718+ elif self .model == "gemini" :
719+ parsed = _parse_gemini_json (stdout )
720+ if parsed is not None :
721+ result = parsed .get ("response" , "" ) or ""
722+ usage_record = _extract_gemini_usage (parsed )
723+ else :
724+ result = stdout
617725 else :
618726 result = stdout
619727
0 commit comments