add zero window factor validation and improve factor existence checking
This commit is contained in:
parent
8cc64cd0c0
commit
44adc836fa
@ -145,3 +145,11 @@ Streamlit `自检测试` 页签提供:
|
||||
|
||||
5. **测试与验证**(待补充)
|
||||
- 需完善部门上下文构造与多模型调用的单元/集成测试,结合回测指标对比多 LLM 策略收益差异。
|
||||
|
||||
|
||||
|
||||
TODO
|
||||
1. 在选股时,因子都已经提前算好,不需要再计算了,直接用就行。
|
||||
2. 因子计算的公式再确认下
|
||||
3. 审查整个项目的代码逻辑,从main.py开始,逐字逐句检查。如一些重复的检查可以去掉;未实现的功能请标记TODO,并给出实现思路;错误的、低效率的调用请修正;代码结构性的问题请指出。
|
||||
4. 梳理整个项目的所有业务逻辑。针对每个业务,从业务实现角度评估代码功能是否存在问题,是否需要优化,是否需要重构。
|
||||
|
||||
@ -91,11 +91,13 @@ def evaluate_factor(
|
||||
|
||||
try:
|
||||
# 计算因子值
|
||||
# 设置 skip_existing=False,确保即使因子已存在也会重新计算
|
||||
factor_results = compute_factor_range(
|
||||
start_date,
|
||||
end_date,
|
||||
factors=[FactorSpec(factor_name, 0)],
|
||||
ts_codes=universe
|
||||
ts_codes=universe,
|
||||
skip_existing=False
|
||||
)
|
||||
|
||||
# 因子计算完成(在异步线程中不直接访问factor_progress)
|
||||
|
||||
@ -17,7 +17,7 @@ from app.features.extended_factors import ExtendedFactors
|
||||
from app.features.sentiment_factors import SentimentFactors
|
||||
from app.features.value_risk_factors import ValueRiskFactors
|
||||
# 导入因子验证功能
|
||||
from app.features.validation import check_data_sufficiency, detect_outliers
|
||||
from app.features.validation import check_data_sufficiency, check_data_sufficiency_for_zero_window, detect_outliers
|
||||
# 导入UI进度状态管理
|
||||
from app.ui.progress_state import factor_progress
|
||||
|
||||
@ -132,11 +132,13 @@ def compute_factors(
|
||||
return []
|
||||
|
||||
if skip_existing:
|
||||
existing = _existing_factor_codes(trade_date_str)
|
||||
# 检查所有因子名称
|
||||
factor_names = [spec.name for spec in specs]
|
||||
existing = _existing_factor_codes_with_factors(trade_date_str, factor_names)
|
||||
universe = [code for code in universe if code not in existing]
|
||||
if not universe:
|
||||
LOGGER.debug(
|
||||
"目标交易日因子已存在 trade_date=%s universe_size=%s",
|
||||
"目标交易日所有因子已存在 trade_date=%s universe_size=%s",
|
||||
trade_date_str,
|
||||
len(existing),
|
||||
extra=LOG_EXTRA,
|
||||
@ -289,6 +291,45 @@ def _existing_factor_codes(trade_date: str) -> set[str]:
|
||||
return {row["ts_code"] for row in rows if row["ts_code"]}
|
||||
|
||||
|
||||
def _existing_factor_codes_with_factors(trade_date: str, factor_names: List[str]) -> Dict[str, bool]:
|
||||
"""检查特定日期和因子的数据是否存在
|
||||
|
||||
Args:
|
||||
trade_date: 交易日期
|
||||
factor_names: 因子名称列表
|
||||
|
||||
Returns:
|
||||
字典,键为股票代码,值为是否存在所有因子
|
||||
"""
|
||||
if not factor_names:
|
||||
return {}
|
||||
|
||||
# 构建检查条件
|
||||
conditions = []
|
||||
for name in factor_names:
|
||||
conditions.append(f"json_extract(factors, '$.{name}') IS NOT NULL")
|
||||
condition_str = " AND ".join(conditions)
|
||||
|
||||
# 构建SQL查询
|
||||
query = """
|
||||
SELECT ts_code
|
||||
FROM factors
|
||||
WHERE trade_date = ?
|
||||
AND """ + condition_str + """
|
||||
GROUP BY ts_code
|
||||
"""
|
||||
|
||||
with db_session(read_only=True) as conn:
|
||||
rows = conn.execute(query, (trade_date,)).fetchall()
|
||||
|
||||
# 返回结果
|
||||
result = {}
|
||||
for row in rows:
|
||||
result[row["ts_code"]] = True
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _list_trade_dates(
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
@ -603,6 +644,19 @@ def _compute_security_factors(
|
||||
)
|
||||
|
||||
# 数据有效性检查
|
||||
# 检查是否有窗口为0的因子
|
||||
has_zero_window = any(spec.window == 0 for spec in specs)
|
||||
|
||||
# 如果有窗口为0的因子,使用专门的数据检查函数
|
||||
if has_zero_window:
|
||||
if not check_data_sufficiency_for_zero_window(ts_code, trade_date):
|
||||
LOGGER.debug(
|
||||
"数据不满足计算条件(窗口为0) ts_code=%s date=%s",
|
||||
ts_code, trade_date,
|
||||
extra=LOG_EXTRA
|
||||
)
|
||||
return {}
|
||||
else:
|
||||
if not check_data_sufficiency(ts_code, trade_date):
|
||||
LOGGER.debug(
|
||||
"数据不满足计算条件 ts_code=%s date=%s",
|
||||
|
||||
@ -189,6 +189,55 @@ def detect_outliers(
|
||||
|
||||
return result
|
||||
|
||||
def check_data_sufficiency_for_zero_window(
|
||||
ts_code: str,
|
||||
trade_date: str
|
||||
) -> bool:
|
||||
"""验证窗口为0的因子所需数据是否充分。
|
||||
|
||||
Args:
|
||||
ts_code: 股票代码
|
||||
trade_date: 交易日期
|
||||
|
||||
Returns:
|
||||
数据是否充分
|
||||
"""
|
||||
from app.utils.data_access import DataBroker
|
||||
|
||||
broker = DataBroker()
|
||||
|
||||
# 记录检查开始
|
||||
LOGGER.debug(
|
||||
"开始检查窗口为0的因子数据充分性 ts_code=%s date=%s",
|
||||
ts_code, trade_date,
|
||||
extra=LOG_EXTRA
|
||||
)
|
||||
|
||||
# 检查日期点数据完整性
|
||||
latest_fields = broker.fetch_latest(
|
||||
ts_code,
|
||||
trade_date,
|
||||
["daily.close", "daily_basic.turnover_rate", "daily_basic.pe", "daily_basic.pb"]
|
||||
)
|
||||
required_fields = {"daily.close"}
|
||||
|
||||
for field in required_fields:
|
||||
if latest_fields.get(field) is None:
|
||||
LOGGER.warning(
|
||||
"缺少必需字段 field=%s ts_code=%s date=%s",
|
||||
field, ts_code, trade_date,
|
||||
extra=LOG_EXTRA
|
||||
)
|
||||
return False
|
||||
|
||||
LOGGER.debug(
|
||||
"窗口为0的因子数据充分性检查通过 ts_code=%s",
|
||||
ts_code,
|
||||
extra=LOG_EXTRA
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def check_data_sufficiency(
|
||||
ts_code: str,
|
||||
trade_date: str,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user