22
33from __future__ import annotations
44
5+ import json
56import os
7+ from functools import lru_cache
8+ from importlib .resources import files
69from typing import Optional
710
811from pydantic import BaseModel , Field
912
1013
11- # Per-token pricing in USD (per 1K tokens)
14+ @lru_cache (maxsize = 1 )
15+ def load_pricing_catalog () -> dict :
16+ """Load the versioned pricing catalog from package data."""
17+ catalog_path = files ("tokenwise" ).joinpath ("data/model_pricing.v1.json" )
18+ return json .loads (catalog_path .read_text (encoding = "utf-8" ))
19+
20+
21+ PRICING_CATALOG = load_pricing_catalog ()
22+ PRICING_VERSION = PRICING_CATALOG ["version" ]
1223MODEL_PRICING : dict [str , dict [str , float ]] = {
13- "gpt-4" : {"input" : 0.03 , "output" : 0.06 },
14- "gpt-4-turbo" : {"input" : 0.01 , "output" : 0.03 },
15- "gpt-4o" : {"input" : 0.005 , "output" : 0.015 },
16- "gpt-3.5-turbo" : {"input" : 0.0005 , "output" : 0.0015 },
17- "claude-3-opus" : {"input" : 0.015 , "output" : 0.075 },
18- "claude-3-sonnet" : {"input" : 0.003 , "output" : 0.015 },
19- "claude-3-haiku" : {"input" : 0.00025 , "output" : 0.00125 },
20- "claude-3.5-sonnet" : {"input" : 0.003 , "output" : 0.015 },
21- "claude-4-opus" : {"input" : 0.015 , "output" : 0.075 },
22- "claude-4-sonnet" : {"input" : 0.003 , "output" : 0.015 },
23- "gemini-1.5-pro" : {"input" : 0.00125 , "output" : 0.005 },
24- "gemini-1.5-flash" : {"input" : 0.000075 , "output" : 0.0003 },
25- "llama-3-70b" : {"input" : 0.00059 , "output" : 0.00079 },
26- "llama-3-8b" : {"input" : 0.00005 , "output" : 0.00008 },
27- "mistral-large" : {"input" : 0.004 , "output" : 0.012 },
28- "mistral-small" : {"input" : 0.001 , "output" : 0.003 },
24+ model : {"input" : details ["input" ], "output" : details ["output" ]}
25+ for model , details in PRICING_CATALOG ["models" ].items ()
2926}
3027
3128# Characters-per-token ratio heuristics by model family
3835 "default" : 3.7 ,
3936}
4037
41- # Default context window sizes
4238MODEL_CONTEXT_WINDOWS : dict [str , int ] = {
43- "gpt-4" : 8192 ,
44- "gpt-4-turbo" : 128000 ,
45- "gpt-4o" : 128000 ,
46- "gpt-3.5-turbo" : 16385 ,
47- "claude-3-opus" : 200000 ,
48- "claude-3-sonnet" : 200000 ,
49- "claude-3-haiku" : 200000 ,
50- "claude-3.5-sonnet" : 200000 ,
51- "claude-4-opus" : 200000 ,
52- "claude-4-sonnet" : 200000 ,
53- "gemini-1.5-pro" : 1000000 ,
54- "gemini-1.5-flash" : 1000000 ,
55- "llama-3-70b" : 8192 ,
56- "llama-3-8b" : 8192 ,
57- "mistral-large" : 32000 ,
58- "mistral-small" : 32000 ,
39+ model : details ["context_window" ]
40+ for model , details in PRICING_CATALOG ["models" ].items ()
5941}
6042
6143# Default budget settings
@@ -84,6 +66,7 @@ class TokenWiseConfig(BaseModel):
8466 monthly_budget_usd : float = Field (default = DEFAULT_BUDGET ["monthly_limit_usd" ])
8567 alert_threshold_pct : int = Field (default = DEFAULT_BUDGET ["alert_threshold_pct" ])
8668 custom_pricing : Optional [dict [str , dict [str , float ]]] = None
69+ pricing_version : str = Field (default = PRICING_VERSION )
8770
8871 def get_pricing (self , model : str ) -> dict [str , float ]:
8972 """Return pricing dict for a model, checking custom overrides first."""
@@ -101,3 +84,11 @@ def get_tokenizer_ratio(self, model: str) -> float:
10184 if family in model .lower ():
10285 return ratio
10386 return TOKENIZER_RATIOS ["default" ]
87+
88+ def get_context_window (self , model : str ) -> int :
89+ """Return the context window for a model."""
90+ if model in MODEL_CONTEXT_WINDOWS :
91+ return MODEL_CONTEXT_WINDOWS [model ]
92+ raise ValueError (
93+ f"Unknown model '{ model } '. Available: { ', ' .join (MODEL_CONTEXT_WINDOWS .keys ())} "
94+ )
0 commit comments