add new model pricing and prefix fallback support

This commit is contained in:
sam 2025-10-08 08:34:29 +08:00
parent d2a214cd28
commit 3247b653c7
2 changed files with 43 additions and 7 deletions

View File

@ -60,14 +60,33 @@ class CostController:
def __init__(self, limits: Optional[CostLimits] = None): def __init__(self, limits: Optional[CostLimits] = None):
"""Initialize cost controller.""" """Initialize cost controller."""
self.limits = limits or CostLimits.default() self.limits = limits or CostLimits.default()
# Maintain model-specific pricing in lowercase so lookups remain case-insensitive.
self._costs: Dict[str, ModelCosts] = { self._costs: Dict[str, ModelCosts] = {
"gpt-4": ModelCosts(0.03, 0.06), "gpt-4": ModelCosts(0.03, 0.06),
"gpt-4-32k": ModelCosts(0.06, 0.12), "gpt-4-32k": ModelCosts(0.06, 0.12),
"gpt-4o": ModelCosts(0.005, 0.015),
"gpt-4o-mini": ModelCosts(0.0006, 0.0018),
"gpt-4.1-mini": ModelCosts(0.0008, 0.002),
"gpt-3.5-turbo": ModelCosts(0.0015, 0.002), "gpt-3.5-turbo": ModelCosts(0.0015, 0.002),
"gpt-3.5-turbo-16k": ModelCosts(0.003, 0.004), "gpt-3.5-turbo-16k": ModelCosts(0.003, 0.004),
"gpt-3.5": ModelCosts(0.0015, 0.002),
"llama2": ModelCosts(0.0, 0.0), "llama2": ModelCosts(0.0, 0.0),
"llama3": ModelCosts(0.0, 0.0),
"phi3": ModelCosts(0.0, 0.0),
"qwen2": ModelCosts(0.0, 0.0),
"codellama": ModelCosts(0.0, 0.0) "codellama": ModelCosts(0.0, 0.0)
} }
# Family-level fallbacks ensure close variants (e.g. gpt-4o-mini-2024) are charged.
self._cost_prefixes: Dict[str, ModelCosts] = {
"gpt-4o": self._costs["gpt-4o"],
"gpt-4.1": self._costs["gpt-4.1-mini"],
"gpt-4": self._costs["gpt-4"],
"gpt-3.5": self._costs["gpt-3.5"],
"llama3": self._costs["llama3"],
"llama2": self._costs["llama2"],
"phi3": self._costs["phi3"],
"qwen2": self._costs["qwen2"],
}
self._usage_lock = threading.Lock() self._usage_lock = threading.Lock()
self._usage: Dict[str, List[Dict[str, Any]]] = { self._usage: Dict[str, List[Dict[str, Any]]] = {
"hourly": [], "hourly": [],
@ -186,11 +205,29 @@ class CostController:
def _calculate_cost(self, model: str, prompt_tokens: int, def _calculate_cost(self, model: str, prompt_tokens: int,
completion_tokens: int) -> float: completion_tokens: int) -> float:
"""计算使用成本.""" """计算使用成本."""
model_costs = self._costs.get(model) model_costs = self._resolve_model_cost(model)
if not model_costs: if not model_costs:
LOGGER.debug(
"Unknown model cost configuration: %s, using zero cost",
model,
extra=LOG_EXTRA,
)
return 0.0 return 0.0
return model_costs.calculate(prompt_tokens, completion_tokens) return model_costs.calculate(prompt_tokens, completion_tokens)
def _resolve_model_cost(self, model: str) -> Optional[ModelCosts]:
"""Resolve the pricing rule for a model, considering family prefixes."""
if not model:
return None
key = model.lower().strip()
if key in self._costs:
return self._costs[key]
for prefix, costs in self._cost_prefixes.items():
if key.startswith(prefix):
return costs
return None
def _check_budget_limits(self, model: str, prompt_tokens: int, def _check_budget_limits(self, model: str, prompt_tokens: int,
completion_tokens: int) -> bool: completion_tokens: int) -> bool:
"""检查是否超出预算限制.""" """检查是否超出预算限制."""

View File

@ -62,13 +62,12 @@ class PromptTemplate:
raise ValueError(f"Missing template variable: {e}") raise ValueError(f"Missing template variable: {e}")
# Truncate if needed, preserving exact number of characters # Truncate if needed, preserving exact number of characters
if len(result) > self.max_length: if self.max_length > 0 and len(result) > self.max_length:
target = self.max_length - 3 # Reserve space for "..." if self.max_length >= 3:
if target > 0: # Only truncate if we have space for content result = result[: self.max_length - 3] + "..."
result = result[:target] + "..."
else: else:
result = "..." # If max_length <= 3, just return "..." result = result[: self.max_length]
return result return result