add new model pricing and prefix fallback support

2025-10-08 08:34:29 +08:00 · 2025-10-08 08:34:29 +08:00 · 3247b653c7
commit 3247b653c7
parent d2a214cd28
2 changed files with 43 additions and 7 deletions
--- a/app/llm/cost.py
+++ b/app/llm/cost.py
@ -60,14 +60,33 @@ class CostController:
    def __init__(self, limits: Optional[CostLimits] = None):
        """Initialize cost controller."""
        self.limits = limits or CostLimits.default()
+        # Maintain model-specific pricing in lowercase so lookups remain case-insensitive.
        self._costs: Dict[str, ModelCosts] = {
            "gpt-4": ModelCosts(0.03, 0.06),
            "gpt-4-32k": ModelCosts(0.06, 0.12),
+            "gpt-4o": ModelCosts(0.005, 0.015),
+            "gpt-4o-mini": ModelCosts(0.0006, 0.0018),
+            "gpt-4.1-mini": ModelCosts(0.0008, 0.002),
            "gpt-3.5-turbo": ModelCosts(0.0015, 0.002),
            "gpt-3.5-turbo-16k": ModelCosts(0.003, 0.004),
+            "gpt-3.5": ModelCosts(0.0015, 0.002),
            "llama2": ModelCosts(0.0, 0.0),
+            "llama3": ModelCosts(0.0, 0.0),
+            "phi3": ModelCosts(0.0, 0.0),
+            "qwen2": ModelCosts(0.0, 0.0),
            "codellama": ModelCosts(0.0, 0.0)
        }
+        # Family-level fallbacks ensure close variants (e.g. gpt-4o-mini-2024) are charged.
+        self._cost_prefixes: Dict[str, ModelCosts] = {
+            "gpt-4o": self._costs["gpt-4o"],
+            "gpt-4.1": self._costs["gpt-4.1-mini"],
+            "gpt-4": self._costs["gpt-4"],
+            "gpt-3.5": self._costs["gpt-3.5"],
+            "llama3": self._costs["llama3"],
+            "llama2": self._costs["llama2"],
+            "phi3": self._costs["phi3"],
+            "qwen2": self._costs["qwen2"],
+        }
        self._usage_lock = threading.Lock()
        self._usage: Dict[str, List[Dict[str, Any]]] = {
            "hourly": [],
@ -186,11 +205,29 @@ class CostController:
    def _calculate_cost(self, model: str, prompt_tokens: int,
                       completion_tokens: int) -> float:
        """计算使用成本."""
-        model_costs = self._costs.get(model)
+        model_costs = self._resolve_model_cost(model)
        if not model_costs:
+            LOGGER.debug(
+                "Unknown model cost configuration: %s, using zero cost",
+                model,
+                extra=LOG_EXTRA,
+            )
            return 0.0
        return model_costs.calculate(prompt_tokens, completion_tokens)

+    def _resolve_model_cost(self, model: str) -> Optional[ModelCosts]:
+        """Resolve the pricing rule for a model, considering family prefixes."""
+
+        if not model:
+            return None
+        key = model.lower().strip()
+        if key in self._costs:
+            return self._costs[key]
+        for prefix, costs in self._cost_prefixes.items():
+            if key.startswith(prefix):
+                return costs
+        return None
+
    def _check_budget_limits(self, model: str, prompt_tokens: int,
                           completion_tokens: int) -> bool:
        """检查是否超出预算限制."""
--- a/app/llm/templates.py
+++ b/app/llm/templates.py
@ -62,12 +62,11 @@ class PromptTemplate:
            raise ValueError(f"Missing template variable: {e}")

        # Truncate if needed, preserving exact number of characters
-        if len(result) > self.max_length:
-            target = self.max_length - 3  # Reserve space for "..."
-            if target > 0:  # Only truncate if we have space for content
-                result = result[:target] + "..."
+        if self.max_length > 0 and len(result) > self.max_length:
+            if self.max_length >= 3:
+                result = result[: self.max_length - 3] + "..."
            else:
-                result = "..."  # If max_length <= 3, just return "..."
+                result = result[: self.max_length]

        return result