128 lines
4.2 KiB
Python
128 lines
4.2 KiB
Python
"""Multi-agent decision game implementation."""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from math import exp, log
|
|
from typing import Dict, Iterable, List, Mapping, Tuple
|
|
|
|
from .base import Agent, AgentAction, AgentContext, UtilityMatrix
|
|
from .registry import weight_map
|
|
|
|
|
|
ACTIONS: Tuple[AgentAction, ...] = (
|
|
AgentAction.SELL,
|
|
AgentAction.HOLD,
|
|
AgentAction.BUY_S,
|
|
AgentAction.BUY_M,
|
|
AgentAction.BUY_L,
|
|
)
|
|
|
|
|
|
def _clamp(value: float) -> float:
|
|
return max(0.0, min(1.0, value))
|
|
|
|
|
|
@dataclass
|
|
class Decision:
|
|
action: AgentAction
|
|
confidence: float
|
|
target_weight: float
|
|
feasible_actions: List[AgentAction]
|
|
utilities: UtilityMatrix
|
|
|
|
|
|
def compute_utilities(agents: Iterable[Agent], context: AgentContext) -> UtilityMatrix:
|
|
utilities: UtilityMatrix = {}
|
|
for action in ACTIONS:
|
|
utilities[action] = {}
|
|
for agent in agents:
|
|
score = _clamp(agent.score(context, action))
|
|
utilities[action][agent.name] = score
|
|
return utilities
|
|
|
|
|
|
def feasible_actions(agents: Iterable[Agent], context: AgentContext) -> List[AgentAction]:
|
|
feas: List[AgentAction] = []
|
|
for action in ACTIONS:
|
|
if all(agent.feasible(context, action) for agent in agents):
|
|
feas.append(action)
|
|
return feas
|
|
|
|
|
|
def nash_bargain(utilities: UtilityMatrix, weights: Mapping[str, float], disagreement: Mapping[str, float]) -> Tuple[AgentAction, float]:
|
|
best_action = AgentAction.HOLD
|
|
best_score = float("-inf")
|
|
for action, agent_scores in utilities.items():
|
|
if action not in utilities:
|
|
continue
|
|
log_product = 0.0
|
|
valid = True
|
|
for agent_name, score in agent_scores.items():
|
|
w = weights.get(agent_name, 0.0)
|
|
if w == 0:
|
|
continue
|
|
gap = score - disagreement.get(agent_name, 0.0)
|
|
if gap <= 0:
|
|
valid = False
|
|
break
|
|
log_product += w * log(gap)
|
|
if not valid:
|
|
continue
|
|
if log_product > best_score:
|
|
best_score = log_product
|
|
best_action = action
|
|
if best_score == float("-inf"):
|
|
return AgentAction.HOLD, 0.0
|
|
confidence = _aggregate_confidence(utilities[best_action], weights)
|
|
return best_action, confidence
|
|
|
|
|
|
def vote(utilities: UtilityMatrix, weights: Mapping[str, float]) -> Tuple[AgentAction, float]:
|
|
scores: Dict[AgentAction, float] = {}
|
|
for action, agent_scores in utilities.items():
|
|
scores[action] = sum(weights.get(agent, 0.0) * score for agent, score in agent_scores.items())
|
|
best_action = max(scores, key=scores.get)
|
|
confidence = _aggregate_confidence(utilities[best_action], weights)
|
|
return best_action, confidence
|
|
|
|
|
|
def _aggregate_confidence(agent_scores: Mapping[str, float], weights: Mapping[str, float]) -> float:
|
|
total = sum(weights.values())
|
|
if total <= 0:
|
|
return 0.0
|
|
weighted = sum(weights.get(agent, 0.0) * score for agent, score in agent_scores.items())
|
|
return weighted / total
|
|
|
|
|
|
def target_weight_for_action(action: AgentAction) -> float:
|
|
mapping = {
|
|
AgentAction.SELL: -1.0,
|
|
AgentAction.HOLD: 0.0,
|
|
AgentAction.BUY_S: 0.01,
|
|
AgentAction.BUY_M: 0.02,
|
|
AgentAction.BUY_L: 0.03,
|
|
}
|
|
return mapping[action]
|
|
|
|
|
|
def decide(context: AgentContext, agents: Iterable[Agent], weights: Mapping[str, float], method: str = "nash") -> Decision:
|
|
agent_list = list(agents)
|
|
norm_weights = weight_map(dict(weights))
|
|
utilities = compute_utilities(agent_list, context)
|
|
feas_actions = feasible_actions(agent_list, context)
|
|
if not feas_actions:
|
|
return Decision(AgentAction.HOLD, 0.0, 0.0, [], utilities)
|
|
|
|
filtered_utilities = {action: utilities[action] for action in feas_actions}
|
|
hold_scores = utilities.get(AgentAction.HOLD, {})
|
|
|
|
if method == "vote":
|
|
action, confidence = vote(filtered_utilities, norm_weights)
|
|
else:
|
|
action, confidence = nash_bargain(filtered_utilities, norm_weights, hold_scores)
|
|
if action not in feas_actions:
|
|
action, confidence = vote(filtered_utilities, norm_weights)
|
|
|
|
weight = target_weight_for_action(action)
|
|
return Decision(action, confidence, weight, feas_actions, utilities)
|