add new model pricing and prefix fallback support
This commit is contained in:
parent
d2a214cd28
commit
3247b653c7
@ -60,14 +60,33 @@ class CostController:
|
||||
def __init__(self, limits: Optional[CostLimits] = None):
|
||||
"""Initialize cost controller."""
|
||||
self.limits = limits or CostLimits.default()
|
||||
# Maintain model-specific pricing in lowercase so lookups remain case-insensitive.
|
||||
self._costs: Dict[str, ModelCosts] = {
|
||||
"gpt-4": ModelCosts(0.03, 0.06),
|
||||
"gpt-4-32k": ModelCosts(0.06, 0.12),
|
||||
"gpt-4o": ModelCosts(0.005, 0.015),
|
||||
"gpt-4o-mini": ModelCosts(0.0006, 0.0018),
|
||||
"gpt-4.1-mini": ModelCosts(0.0008, 0.002),
|
||||
"gpt-3.5-turbo": ModelCosts(0.0015, 0.002),
|
||||
"gpt-3.5-turbo-16k": ModelCosts(0.003, 0.004),
|
||||
"gpt-3.5": ModelCosts(0.0015, 0.002),
|
||||
"llama2": ModelCosts(0.0, 0.0),
|
||||
"llama3": ModelCosts(0.0, 0.0),
|
||||
"phi3": ModelCosts(0.0, 0.0),
|
||||
"qwen2": ModelCosts(0.0, 0.0),
|
||||
"codellama": ModelCosts(0.0, 0.0)
|
||||
}
|
||||
# Family-level fallbacks ensure close variants (e.g. gpt-4o-mini-2024) are charged.
|
||||
self._cost_prefixes: Dict[str, ModelCosts] = {
|
||||
"gpt-4o": self._costs["gpt-4o"],
|
||||
"gpt-4.1": self._costs["gpt-4.1-mini"],
|
||||
"gpt-4": self._costs["gpt-4"],
|
||||
"gpt-3.5": self._costs["gpt-3.5"],
|
||||
"llama3": self._costs["llama3"],
|
||||
"llama2": self._costs["llama2"],
|
||||
"phi3": self._costs["phi3"],
|
||||
"qwen2": self._costs["qwen2"],
|
||||
}
|
||||
self._usage_lock = threading.Lock()
|
||||
self._usage: Dict[str, List[Dict[str, Any]]] = {
|
||||
"hourly": [],
|
||||
@ -186,11 +205,29 @@ class CostController:
|
||||
def _calculate_cost(self, model: str, prompt_tokens: int,
|
||||
completion_tokens: int) -> float:
|
||||
"""计算使用成本."""
|
||||
model_costs = self._costs.get(model)
|
||||
model_costs = self._resolve_model_cost(model)
|
||||
if not model_costs:
|
||||
LOGGER.debug(
|
||||
"Unknown model cost configuration: %s, using zero cost",
|
||||
model,
|
||||
extra=LOG_EXTRA,
|
||||
)
|
||||
return 0.0
|
||||
return model_costs.calculate(prompt_tokens, completion_tokens)
|
||||
|
||||
def _resolve_model_cost(self, model: str) -> Optional[ModelCosts]:
|
||||
"""Resolve the pricing rule for a model, considering family prefixes."""
|
||||
|
||||
if not model:
|
||||
return None
|
||||
key = model.lower().strip()
|
||||
if key in self._costs:
|
||||
return self._costs[key]
|
||||
for prefix, costs in self._cost_prefixes.items():
|
||||
if key.startswith(prefix):
|
||||
return costs
|
||||
return None
|
||||
|
||||
def _check_budget_limits(self, model: str, prompt_tokens: int,
|
||||
completion_tokens: int) -> bool:
|
||||
"""检查是否超出预算限制."""
|
||||
|
||||
@ -62,12 +62,11 @@ class PromptTemplate:
|
||||
raise ValueError(f"Missing template variable: {e}")
|
||||
|
||||
# Truncate if needed, preserving exact number of characters
|
||||
if len(result) > self.max_length:
|
||||
target = self.max_length - 3 # Reserve space for "..."
|
||||
if target > 0: # Only truncate if we have space for content
|
||||
result = result[:target] + "..."
|
||||
if self.max_length > 0 and len(result) > self.max_length:
|
||||
if self.max_length >= 3:
|
||||
result = result[: self.max_length - 3] + "..."
|
||||
else:
|
||||
result = "..." # If max_length <= 3, just return "..."
|
||||
result = result[: self.max_length]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user