667 lines
26 KiB
Python
667 lines
26 KiB
Python
"""Extended factor implementations for the quant system.
|
||
|
||
This module contains additional high-quality factors that extend the default factor set.
|
||
All factors are designed to be lightweight and programmatically generated to meet
|
||
end-to-end automated decision-making requirements.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass
|
||
from typing import Dict, List, Sequence, Optional, Any
|
||
import functools
|
||
|
||
import numpy as np
|
||
|
||
from app.utils.logging import get_logger
|
||
|
||
LOGGER = get_logger(__name__)
|
||
LOG_EXTRA = {"stage": "extended_factors"}
|
||
|
||
|
||
def handle_factor_errors(func: Any) -> Any:
|
||
"""装饰器:处理因子计算过程中的错误
|
||
|
||
Args:
|
||
func: 要装饰的函数
|
||
|
||
Returns:
|
||
装饰后的函数
|
||
"""
|
||
@functools.wraps(func)
|
||
def wrapper(*args: Any, **kwargs: Any) -> Optional[float]:
|
||
try:
|
||
return func(*args, **kwargs)
|
||
except Exception as e:
|
||
# 获取因子名称(如果可能)
|
||
factor_name = "unknown"
|
||
if len(args) > 2 and isinstance(args[1], str):
|
||
factor_name = args[1]
|
||
elif "factor_name" in kwargs:
|
||
factor_name = kwargs["factor_name"]
|
||
|
||
LOGGER.error(
|
||
"计算因子出错 name=%s error=%s",
|
||
factor_name,
|
||
str(e),
|
||
exc_info=True,
|
||
extra=LOG_EXTRA
|
||
)
|
||
return None
|
||
return wrapper
|
||
|
||
from app.core.indicators import momentum, rolling_mean, normalize
|
||
from app.core.technical import (
|
||
rsi, macd, bollinger_bands, obv_momentum, price_volume_trend
|
||
)
|
||
from app.core.momentum import (
|
||
adaptive_momentum, momentum_quality, momentum_regime
|
||
)
|
||
from app.core.volatility import (
|
||
volatility, garch_volatility, volatility_regime,
|
||
volume_price_correlation
|
||
)
|
||
from app.features.validation import validate_factor_value
|
||
|
||
|
||
@dataclass
|
||
class FactorSpec:
|
||
"""Specification for a factor computation.
|
||
|
||
Attributes:
|
||
name: Factor name identifier
|
||
window: Required lookback window (0 for snapshot-only factors)
|
||
"""
|
||
name: str
|
||
window: int
|
||
|
||
|
||
# Extended factors focusing on momentum, value, and liquidity signals
|
||
EXTENDED_FACTORS: List[FactorSpec] = [
|
||
# 技术分析因子
|
||
FactorSpec("tech_rsi_14", 14), # 14日RSI
|
||
FactorSpec("tech_macd_signal", 26), # MACD信号
|
||
FactorSpec("tech_bb_position", 20), # 布林带位置
|
||
FactorSpec("tech_obv_momentum", 20), # OBV动量
|
||
FactorSpec("tech_pv_trend", 20), # 价量趋势
|
||
|
||
# 趋势跟踪因子
|
||
FactorSpec("trend_ma_cross", 20), # 均线交叉信号
|
||
FactorSpec("trend_price_channel", 20), # 价格通道突破
|
||
FactorSpec("trend_adx", 14), # 简化版平均趋向指标(近似)
|
||
|
||
# 市场微观结构因子
|
||
FactorSpec("micro_tick_direction", 5), # 逐笔方向
|
||
FactorSpec("micro_trade_imbalance", 10), # 交易不平衡度(基于签名成交量)
|
||
|
||
# 波动率预测因子
|
||
FactorSpec("vol_garch", 20), # GARCH波动率
|
||
FactorSpec("vol_range_pred", 10), # 波动区间预测
|
||
FactorSpec("vol_regime", 20), # 波动率状态
|
||
|
||
# 量价联合因子
|
||
FactorSpec("volume_price_corr", 20), # 量价相关性
|
||
FactorSpec("volume_price_diverge", 10), # 量价背离
|
||
FactorSpec("volume_intensity", 5), # 成交强度
|
||
|
||
# 增强动量因子
|
||
FactorSpec("momentum_adaptive", 20), # 自适应动量
|
||
FactorSpec("momentum_regime", 20), # 动量区间
|
||
FactorSpec("momentum_quality", 20), # 动量质量,
|
||
|
||
# 价格均线比率因子
|
||
FactorSpec("price_ma_10_ratio", 10), # 当前价格与10日均线比率
|
||
FactorSpec("price_ma_20_ratio", 20), # 当前价格与20日均线比率
|
||
FactorSpec("price_ma_60_ratio", 60), # 当前价格与60日均线比率
|
||
|
||
# 成交量均线比率因子
|
||
FactorSpec("volume_ma_5_ratio", 5), # 当前成交量与5日均线比率
|
||
FactorSpec("volume_ma_20_ratio", 20), # 当前成交量与20日均线比率
|
||
|
||
# 估值因子
|
||
FactorSpec("val_ps_score", 0), # PS估值分数
|
||
FactorSpec("val_multiscore", 0), # 多维估值分数
|
||
FactorSpec("val_dividend_score", 0), # 股息评分
|
||
|
||
# 市场状态因子
|
||
FactorSpec("market_regime", 20), # 市场状态
|
||
FactorSpec("trend_strength", 20), # 趋势强度
|
||
|
||
# 风险因子
|
||
FactorSpec("risk_penalty", 20), # 风险惩罚因子
|
||
]
|
||
|
||
|
||
class ExtendedFactors:
|
||
"""扩展因子计算实现类。
|
||
|
||
该类实现了一组用于量化交易的扩展因子计算。包括:
|
||
1. 技术分析因子 (RSI, MACD, 布林带等)
|
||
2. 趋势跟踪因子 (均线交叉等)
|
||
3. 波动率预测因子 (GARCH, 波动率状态等)
|
||
4. 量价联合因子 (量价相关性等)
|
||
5. 动量强化因子 (自适应动量等)
|
||
6. 均线比率因子 (价格/成交量均线比率)
|
||
|
||
使用示例:
|
||
calculator = ExtendedFactors()
|
||
factor_value = calculator.compute_factor(
|
||
"tech_rsi_14",
|
||
close_series,
|
||
volume_series
|
||
)
|
||
all_factors = calculator.compute_all_factors(close_series, volume_series)
|
||
normalized = calculator.normalize_factors(all_factors)
|
||
|
||
属性:
|
||
factor_specs: Dict[str, FactorSpec], 因子名称到因子规格的映射
|
||
"""
|
||
|
||
def __init__(self):
|
||
"""初始化因子计算器,构建因子规格映射"""
|
||
self.factor_specs = {spec.name: spec for spec in EXTENDED_FACTORS}
|
||
# 关闭初始化日志打印
|
||
# LOGGER.info(
|
||
# "初始化因子计算器,加载因子数量: %d",
|
||
# len(self.factor_specs),
|
||
# extra=LOG_EXTRA
|
||
# )
|
||
|
||
@handle_factor_errors
|
||
def compute_factor(self,
|
||
factor_name: str,
|
||
close_series: Sequence[float],
|
||
volume_series: Sequence[float]) -> Optional[float]:
|
||
"""计算单个因子值
|
||
|
||
Args:
|
||
factor_name: 因子名称,必须是已注册的因子
|
||
close_series: 收盘价序列,从新到旧排序
|
||
volume_series: 成交量序列,从新到旧排序
|
||
|
||
Returns:
|
||
factor_value: Optional[float], 计算得到的因子值,失败时返回None
|
||
|
||
Raises:
|
||
ValueError: 当因子名称未知或数据不足时抛出
|
||
"""
|
||
spec = self.factor_specs.get(factor_name)
|
||
if spec is None:
|
||
raise ValueError(f"未知的因子名称: {factor_name}")
|
||
|
||
if len(close_series) < spec.window:
|
||
raise ValueError(
|
||
f"数据长度不足: 需要{spec.window},实际{len(close_series)}"
|
||
)
|
||
|
||
# 技术分析因子
|
||
if factor_name == "tech_rsi_14":
|
||
return rsi(close_series, 14)
|
||
|
||
elif factor_name == "tech_macd_signal":
|
||
# MACD柱状图值(histogram)
|
||
return macd(close_series, 12, 26, 9)
|
||
|
||
elif factor_name == "tech_bb_position":
|
||
# 价格在布林带中的位置(-1到1)
|
||
return bollinger_bands(close_series, 20)
|
||
|
||
elif factor_name == "tech_obv_momentum":
|
||
return obv_momentum(close_series, volume_series, 20)
|
||
|
||
elif factor_name == "tech_pv_trend":
|
||
return price_volume_trend(close_series, volume_series, 20)
|
||
|
||
# 趋势跟踪因子
|
||
elif factor_name == "trend_ma_cross":
|
||
# 修复:返回均线交叉的比例而不是差值
|
||
ma_5 = rolling_mean(close_series, 5)
|
||
ma_20 = rolling_mean(close_series, 20)
|
||
if ma_20 is not None and ma_20 != 0:
|
||
return (ma_5 - ma_20) / ma_20 if ma_5 is not None else None
|
||
else:
|
||
return 0.0 if ma_5 is not None else None
|
||
|
||
elif factor_name == "trend_price_channel":
|
||
# 价格通道突破因子:当前价格相对于通道的位置
|
||
window = 20
|
||
high_channel = max(close_series[:window])
|
||
low_channel = min(close_series[:window])
|
||
if high_channel != low_channel:
|
||
return (close_series[0] - low_channel) / (high_channel - low_channel)
|
||
return 0.0
|
||
|
||
elif factor_name == "trend_adx":
|
||
# 修复:标准ADX计算实现(需要high/low序列)
|
||
# 注意:当前仅使用close序列作为high/low的近似,这是一个简化实现
|
||
window = 14
|
||
if len(close_series) < window + 1:
|
||
return None
|
||
|
||
# 计算TR、+DM、-DM序列
|
||
tr_series = []
|
||
plus_dm_series = []
|
||
minus_dm_series = []
|
||
|
||
for i in range(len(close_series) - 1):
|
||
# 使用close作为high/low的近似(简化实现)
|
||
today_high = close_series[i]
|
||
today_low = close_series[i]
|
||
prev_high = close_series[i + 1]
|
||
prev_low = close_series[i + 1]
|
||
prev_close = close_series[i + 1]
|
||
|
||
# 计算真实波幅(TR)
|
||
tr = max(
|
||
abs(today_high - today_low),
|
||
abs(today_high - prev_close),
|
||
abs(today_low - prev_close)
|
||
)
|
||
tr_series.append(tr)
|
||
|
||
# 计算方向运动
|
||
plus_dm = max(today_high - prev_high, 0)
|
||
minus_dm = max(prev_low - today_low, 0)
|
||
|
||
# 确保只有一项为正值
|
||
if plus_dm > minus_dm:
|
||
minus_dm = 0
|
||
elif minus_dm > plus_dm:
|
||
plus_dm = 0
|
||
else:
|
||
plus_dm = minus_dm = 0
|
||
|
||
plus_dm_series.append(plus_dm)
|
||
minus_dm_series.append(minus_dm)
|
||
|
||
if len(tr_series) < window:
|
||
return None
|
||
|
||
# 计算+DI和-DI(使用Wilder平滑方法)
|
||
plus_di_series = []
|
||
minus_di_series = []
|
||
|
||
# 初始化第一个值
|
||
tr_sum = sum(tr_series[:window])
|
||
plus_dm_sum = sum(plus_dm_series[:window])
|
||
minus_dm_sum = sum(minus_dm_series[:window])
|
||
|
||
if tr_sum > 0:
|
||
plus_di = 100 * (plus_dm_sum / tr_sum)
|
||
minus_di = 100 * (minus_dm_sum / tr_sum)
|
||
else:
|
||
plus_di = minus_di = 0
|
||
|
||
plus_di_series.append(plus_di)
|
||
minus_di_series.append(minus_di)
|
||
|
||
# 计算后续值(使用Wilder平滑)
|
||
for i in range(1, len(tr_series) - window + 1):
|
||
# Wilder平滑:当前值 = (前一个平滑值 * (n-1) + 当前值) / n
|
||
tr_sum = (tr_sum * (window - 1) + tr_series[i + window - 1]) / window
|
||
plus_dm_sum = (plus_dm_sum * (window - 1) + plus_dm_series[i + window - 1]) / window
|
||
minus_dm_sum = (minus_dm_sum * (window - 1) + minus_dm_series[i + window - 1]) / window
|
||
|
||
if tr_sum > 0:
|
||
plus_di = 100 * (plus_dm_sum / tr_sum)
|
||
minus_di = 100 * (minus_dm_sum / tr_sum)
|
||
else:
|
||
plus_di = minus_di = 0
|
||
|
||
plus_di_series.append(plus_di)
|
||
minus_di_series.append(minus_di)
|
||
|
||
# 计算DX序列
|
||
dx_series = []
|
||
for i in range(len(plus_di_series)):
|
||
plus_di_val = plus_di_series[i]
|
||
minus_di_val = minus_di_series[i]
|
||
|
||
if plus_di_val + minus_di_val > 0:
|
||
dx = 100 * (abs(plus_di_val - minus_di_val) / (plus_di_val + minus_di_val))
|
||
else:
|
||
dx = 0
|
||
dx_series.append(dx)
|
||
|
||
# 计算ADX(DX的移动平均)
|
||
if len(dx_series) < window:
|
||
return None
|
||
|
||
# ADX是DX的移动平均(使用简单移动平均)
|
||
adx = sum(dx_series[:window]) / window
|
||
|
||
# 确保在0-100范围内
|
||
return max(0, min(100, adx))
|
||
|
||
# 市场微观结构因子
|
||
elif factor_name == "micro_tick_direction":
|
||
# 简化的逐笔方向:基于最近价格变动
|
||
window = 5
|
||
if len(close_series) < window + 1:
|
||
return None
|
||
|
||
# 计算价格变动方向
|
||
directions = [1 if close_series[i] > close_series[i+1] else -1 for i in range(window)]
|
||
return sum(directions) / window
|
||
|
||
elif factor_name == "micro_trade_imbalance":
|
||
# 修复:交易失衡(按常规定义实现)
|
||
# 基于签名成交量的归一化差值
|
||
window = 10
|
||
if len(close_series) < window + 1 or len(volume_series) < window + 1:
|
||
return None
|
||
|
||
# 计算签名成交量:volume_t * sign(close_t - close_{t-1})
|
||
signed_volumes = []
|
||
total_volume = 0
|
||
|
||
for i in range(window):
|
||
if i + 1 < len(close_series):
|
||
# 计算价格变动符号
|
||
price_change = close_series[i] - close_series[i+1]
|
||
sign = 1 if price_change > 0 else (-1 if price_change < 0 else 0)
|
||
|
||
# 计算签名成交量
|
||
signed_volume = volume_series[i] * sign
|
||
signed_volumes.append(signed_volume)
|
||
total_volume += volume_series[i]
|
||
|
||
if total_volume == 0:
|
||
return 0.0
|
||
|
||
# 计算交易不平衡度:signed_vol / total_volume
|
||
signed_vol_sum = sum(signed_volumes)
|
||
imbalance = signed_vol_sum / (total_volume + 1e-8)
|
||
|
||
# 确保结果在[-1, 1]范围内
|
||
return max(-1.0, min(1.0, imbalance))
|
||
|
||
# 波动率预测因子
|
||
elif factor_name == "vol_garch":
|
||
return garch_volatility(close_series, 20)
|
||
|
||
elif factor_name == "vol_range_pred":
|
||
# 波动区间预测:基于历史价格区间
|
||
window = 10
|
||
if len(close_series) < window + 5:
|
||
return None
|
||
|
||
# 计算历史价格区间
|
||
ranges = []
|
||
for i in range(5): # 使用最近5个窗口
|
||
if i + window < len(close_series):
|
||
price_range = max(close_series[i:i+window]) - min(close_series[i:i+window])
|
||
ranges.append(price_range / close_series[i])
|
||
|
||
if ranges:
|
||
# 使用历史区间的75分位数作为预测
|
||
return np.percentile(ranges, 75)
|
||
return None
|
||
|
||
elif factor_name == "vol_regime":
|
||
return volatility_regime(close_series, volume_series, 20)
|
||
|
||
# 量价联合因子
|
||
elif factor_name == "volume_price_corr":
|
||
return volume_price_correlation(close_series, volume_series, 20)
|
||
|
||
elif factor_name == "volume_price_diverge":
|
||
# 量价背离:价格和成交量趋势的背离程度
|
||
window = 10
|
||
if len(close_series) < window or len(volume_series) < window:
|
||
return None
|
||
|
||
# 计算价格和成交量趋势
|
||
price_trend = sum(1 if close_series[i] > close_series[i+1] else -1 for i in range(window-1))
|
||
volume_trend = sum(1 if volume_series[i] > volume_series[i+1] else -1 for i in range(window-1))
|
||
|
||
# 计算背离程度
|
||
divergence = price_trend * volume_trend * -1 # 反向为背离
|
||
return np.clip(divergence / (window - 1), -1, 1)
|
||
|
||
elif factor_name == "volume_intensity":
|
||
# 成交强度:基于成交量和价格变动的加权指标
|
||
window = 5
|
||
if len(close_series) < window + 1 or len(volume_series) < window + 1:
|
||
return None
|
||
|
||
# 计算价格变动
|
||
price_changes = [abs(close_series[i] - close_series[i+1]) for i in range(window)]
|
||
|
||
# 计算成交量加权的价格变动
|
||
weighted_changes = sum(price_changes[i] * volume_series[i] for i in range(window))
|
||
total_volume = sum(volume_series[:window])
|
||
|
||
if total_volume > 0:
|
||
intensity = weighted_changes / (total_volume * np.mean(close_series[:window]) + 1e-8)
|
||
return np.clip(intensity * 100, -100, 100) # 归一化到合理范围
|
||
return None
|
||
|
||
# 增强动量因子
|
||
elif factor_name == "momentum_adaptive":
|
||
return adaptive_momentum(close_series, volume_series, 20)
|
||
|
||
elif factor_name == "momentum_regime":
|
||
return momentum_regime(close_series, volume_series, 20)
|
||
|
||
elif factor_name == "momentum_quality":
|
||
return momentum_quality(close_series, 20)
|
||
|
||
# 均线比率因子
|
||
elif factor_name.endswith("_ratio"):
|
||
if "price_ma" in factor_name:
|
||
window = int(factor_name.split("_")[2])
|
||
ma = rolling_mean(close_series, window)
|
||
return close_series[0] / ma if ma > 0 else None
|
||
|
||
elif "volume_ma" in factor_name:
|
||
window = int(factor_name.split("_")[2])
|
||
ma = rolling_mean(volume_series, window)
|
||
return volume_series[0] / ma if ma > 0 else None
|
||
|
||
# 估值因子
|
||
elif factor_name == "val_ps_score":
|
||
# PS估值分数:基于PS比率的估值指标
|
||
# 假设PS比率越低,估值越有吸引力
|
||
if len(close_series) < 10:
|
||
return None
|
||
|
||
# 简化的PS估值:基于价格与历史均值的比较
|
||
current_price = close_series[0]
|
||
avg_price = np.mean(close_series[:10])
|
||
|
||
if avg_price > 0:
|
||
# 当前价格相对于历史均值的偏离程度
|
||
ps_ratio = current_price / avg_price
|
||
# 标准化到[-1, 1]区间
|
||
return np.clip((1.0 - ps_ratio) / 2.0, -1, 1)
|
||
return None
|
||
|
||
elif factor_name == "val_multiscore":
|
||
# 多维估值分数:综合多个估值维度的评分
|
||
if len(close_series) < 20:
|
||
return None
|
||
|
||
# 使用价格动量、波动率和相对强度作为估值代理
|
||
momentum_5 = momentum(close_series, 5)
|
||
momentum_20 = momentum(close_series, 20)
|
||
|
||
# 计算波动率(手动实现,避免依赖外部函数)
|
||
if len(close_series) >= 20:
|
||
returns = [close_series[i] / close_series[i+1] - 1 for i in range(19)] # 修正索引范围
|
||
volatility_20 = np.std(returns) if returns else 0
|
||
else:
|
||
volatility_20 = 0
|
||
|
||
# 综合评分:动量正向,波动率负向
|
||
if volatility_20 > 0:
|
||
score = (momentum_5 + momentum_20) / (2 * volatility_20)
|
||
return np.clip(score, -1, 1)
|
||
return None
|
||
|
||
elif factor_name == "val_dividend_score":
|
||
# 股息评分:基于价格稳定性和趋势的股息吸引力评分
|
||
if len(close_series) < 20:
|
||
return None
|
||
|
||
# 计算价格稳定性(低波动率)
|
||
if len(close_series) >= 20:
|
||
returns = [close_series[i] / close_series[i+1] - 1 for i in range(19)] # 修正索引范围
|
||
vol = np.std(returns) if returns else 0
|
||
else:
|
||
vol = 0
|
||
|
||
# 计算趋势强度
|
||
trend = momentum(close_series, 20)
|
||
|
||
# 股息吸引力:稳定性正向,趋势正向
|
||
stability_score = 1.0 - np.clip(vol, 0, 1)
|
||
trend_score = np.clip(trend, -1, 1)
|
||
|
||
return (stability_score + trend_score) / 2.0
|
||
|
||
# 市场状态因子
|
||
elif factor_name == "market_regime":
|
||
# 市场状态:基于价格和成交量的市场状态判断
|
||
if len(close_series) < 20 or len(volume_series) < 20:
|
||
return None
|
||
|
||
# 价格趋势
|
||
price_trend = momentum(close_series, 20)
|
||
# 成交量趋势
|
||
volume_trend = momentum(volume_series, 20)
|
||
|
||
# 市场状态:牛市(价格↑成交量↑)、熊市(价格↓成交量↓)、
|
||
# 震荡市(价格平稳成交量平稳)、背离市(价格成交量反向)
|
||
regime_score = price_trend * volume_trend
|
||
return np.clip(regime_score, -1, 1)
|
||
|
||
elif factor_name == "trend_strength":
|
||
# 趋势强度:基于价格变动的趋势强度度量
|
||
if len(close_series) < 20:
|
||
return None
|
||
|
||
# 计算不同时间窗口的动量
|
||
momentum_5 = momentum(close_series, 5)
|
||
momentum_10 = momentum(close_series, 10)
|
||
momentum_20 = momentum(close_series, 20)
|
||
|
||
# 趋势强度:短期、中期、长期动量的一致性
|
||
trend_strength = (momentum_5 + momentum_10 + momentum_20) / 3.0
|
||
return np.clip(trend_strength, -1, 1)
|
||
|
||
# 风险因子
|
||
elif factor_name == "risk_penalty":
|
||
# 风险惩罚因子:基于波动率和异常价格的综合风险度量
|
||
if len(close_series) < 20:
|
||
return None
|
||
|
||
# 波动率风险
|
||
if len(close_series) >= 20:
|
||
returns = [close_series[i] / close_series[i+1] - 1 for i in range(19)] # 修正索引范围
|
||
vol_risk = np.std(returns) if returns else 0
|
||
else:
|
||
vol_risk = 0
|
||
|
||
# 价格异常风险(相对于均值的偏离)
|
||
avg_price = np.mean(close_series[:20])
|
||
if avg_price > 0:
|
||
price_deviation = abs(close_series[0] / avg_price - 1.0)
|
||
else:
|
||
price_deviation = 0
|
||
|
||
# 综合风险评分
|
||
risk_score = (vol_risk + price_deviation) / 2.0
|
||
return np.clip(risk_score, 0, 1) # 风险因子范围为[0, 1]
|
||
|
||
raise ValueError(f"因子 {factor_name} 没有对应的计算实现")
|
||
|
||
def compute_all_factors(self,
|
||
close_series: List[float],
|
||
volume_series: List[float],
|
||
ts_code: str,
|
||
trade_date: str) -> Dict[str, float | None]:
|
||
"""计算所有扩展因子
|
||
|
||
Args:
|
||
close_series: 收盘价序列
|
||
volume_series: 成交量序列
|
||
ts_code: 股票代码
|
||
trade_date: 交易日期
|
||
|
||
Returns:
|
||
因子名称到因子值的映射
|
||
"""
|
||
results = {}
|
||
|
||
for factor_spec in EXTENDED_FACTORS:
|
||
try:
|
||
factor_name = factor_spec.name
|
||
factor_value = self.compute_factor(factor_name, close_series, volume_series)
|
||
|
||
# 验证因子值
|
||
if factor_value is not None:
|
||
# 使用真实的 ts_code 和 trade_date 进行验证
|
||
validate_factor_value(factor_name, factor_value, ts_code, trade_date)
|
||
|
||
results[factor_name] = factor_value
|
||
|
||
except Exception as e:
|
||
LOGGER.debug(
|
||
"因子计算失败 factor=%s ts_code=%s date=%s err=%s",
|
||
factor_spec.name,
|
||
ts_code,
|
||
trade_date,
|
||
str(e),
|
||
extra=LOG_EXTRA,
|
||
)
|
||
results[factor_spec.name] = None
|
||
|
||
return results
|
||
|
||
def normalize_factors(self,
|
||
factors: Dict[str, float],
|
||
clip_threshold: float = 3.0) -> Dict[str, float]:
|
||
"""标准化因子值到[-1,1]区间
|
||
|
||
Args:
|
||
factors: 原始因子值字典
|
||
clip_threshold: float, 标准化时的截断阈值,默认为3.0
|
||
|
||
Returns:
|
||
Dict[str, float]: 标准化后的因子值字典,
|
||
只包含成功标准化的因子值
|
||
|
||
Note:
|
||
标准化过程包括:
|
||
1. Z-score标准化
|
||
2. 使用tanh压缩到[-1,1]区间
|
||
3. 异常值处理(截断)
|
||
"""
|
||
results = {}
|
||
success_count = 0
|
||
|
||
for name, value in factors.items():
|
||
if value is not None:
|
||
try:
|
||
normalized = normalize(value, clip_threshold)
|
||
if not np.isnan(normalized):
|
||
results[name] = normalized
|
||
success_count += 1
|
||
except Exception as e:
|
||
LOGGER.warning(
|
||
"因子标准化失败 name=%s error=%s",
|
||
name,
|
||
str(e),
|
||
extra=LOG_EXTRA
|
||
)
|
||
|
||
LOGGER.info(
|
||
"因子标准化完成 total=%d success=%d failed=%d",
|
||
len(factors),
|
||
success_count,
|
||
len(factors) - success_count,
|
||
extra=LOG_EXTRA
|
||
)
|
||
|
||
return results |