From 44adc836faa42a1c4a51e009e631e2f08ba07470 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 9 Oct 2025 20:59:27 +0800 Subject: [PATCH] add zero window factor validation and improve factor existence checking --- README.md | 8 +++++ app/features/evaluation.py | 4 ++- app/features/factors.py | 74 ++++++++++++++++++++++++++++++++------ app/features/validation.py | 49 +++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 6e508c4..0e87572 100644 --- a/README.md +++ b/README.md @@ -145,3 +145,11 @@ Streamlit `自检测试` 页签提供: 5. **测试与验证**(待补充) - 需完善部门上下文构造与多模型调用的单元/集成测试,结合回测指标对比多 LLM 策略收益差异。 + + + +TODO +1. 在选股时,因子都已经提前算好,不需要再计算了,直接用就行。 +2. 因子计算的公式再确认下 +3. 审查整个项目的代码逻辑,从main.py开始,逐字逐句检查。如一些重复的检查可以去掉;未实现的功能请标记TODO,并给出实现思路;错误的、低效率的调用请修正;代码结构性的问题请指出。 +4. 梳理整个项目的所有业务逻辑。针对每个业务,从业务实现角度评估代码功能是否存在问题,是否需要优化,是否需要重构。 diff --git a/app/features/evaluation.py b/app/features/evaluation.py index 0a46121..5eea557 100644 --- a/app/features/evaluation.py +++ b/app/features/evaluation.py @@ -91,11 +91,13 @@ def evaluate_factor( try: # 计算因子值 + # 设置 skip_existing=False,确保即使因子已存在也会重新计算 factor_results = compute_factor_range( start_date, end_date, factors=[FactorSpec(factor_name, 0)], - ts_codes=universe + ts_codes=universe, + skip_existing=False ) # 因子计算完成(在异步线程中不直接访问factor_progress) diff --git a/app/features/factors.py b/app/features/factors.py index a340fbd..4669e0b 100644 --- a/app/features/factors.py +++ b/app/features/factors.py @@ -17,7 +17,7 @@ from app.features.extended_factors import ExtendedFactors from app.features.sentiment_factors import SentimentFactors from app.features.value_risk_factors import ValueRiskFactors # 导入因子验证功能 -from app.features.validation import check_data_sufficiency, detect_outliers +from app.features.validation import check_data_sufficiency, check_data_sufficiency_for_zero_window, detect_outliers # 导入UI进度状态管理 from app.ui.progress_state import factor_progress @@ -132,11 +132,13 @@ def compute_factors( return [] if skip_existing: - existing = _existing_factor_codes(trade_date_str) + # 检查所有因子名称 + factor_names = [spec.name for spec in specs] + existing = _existing_factor_codes_with_factors(trade_date_str, factor_names) universe = [code for code in universe if code not in existing] if not universe: LOGGER.debug( - "目标交易日因子已存在 trade_date=%s universe_size=%s", + "目标交易日所有因子已存在 trade_date=%s universe_size=%s", trade_date_str, len(existing), extra=LOG_EXTRA, @@ -289,6 +291,45 @@ def _existing_factor_codes(trade_date: str) -> set[str]: return {row["ts_code"] for row in rows if row["ts_code"]} +def _existing_factor_codes_with_factors(trade_date: str, factor_names: List[str]) -> Dict[str, bool]: + """检查特定日期和因子的数据是否存在 + + Args: + trade_date: 交易日期 + factor_names: 因子名称列表 + + Returns: + 字典,键为股票代码,值为是否存在所有因子 + """ + if not factor_names: + return {} + + # 构建检查条件 + conditions = [] + for name in factor_names: + conditions.append(f"json_extract(factors, '$.{name}') IS NOT NULL") + condition_str = " AND ".join(conditions) + + # 构建SQL查询 + query = """ + SELECT ts_code + FROM factors + WHERE trade_date = ? + AND """ + condition_str + """ + GROUP BY ts_code + """ + + with db_session(read_only=True) as conn: + rows = conn.execute(query, (trade_date,)).fetchall() + + # 返回结果 + result = {} + for row in rows: + result[row["ts_code"]] = True + + return result + + def _list_trade_dates( start_date: str, end_date: str, @@ -603,13 +644,26 @@ def _compute_security_factors( ) # 数据有效性检查 - if not check_data_sufficiency(ts_code, trade_date): - LOGGER.debug( - "数据不满足计算条件 ts_code=%s date=%s", - ts_code, trade_date, - extra=LOG_EXTRA - ) - return {} + # 检查是否有窗口为0的因子 + has_zero_window = any(spec.window == 0 for spec in specs) + + # 如果有窗口为0的因子,使用专门的数据检查函数 + if has_zero_window: + if not check_data_sufficiency_for_zero_window(ts_code, trade_date): + LOGGER.debug( + "数据不满足计算条件(窗口为0) ts_code=%s date=%s", + ts_code, trade_date, + extra=LOG_EXTRA + ) + return {} + else: + if not check_data_sufficiency(ts_code, trade_date): + LOGGER.debug( + "数据不满足计算条件 ts_code=%s date=%s", + ts_code, trade_date, + extra=LOG_EXTRA + ) + return {} turnover_series = _fetch_series_values( broker, diff --git a/app/features/validation.py b/app/features/validation.py index 7815f23..f2ebca7 100644 --- a/app/features/validation.py +++ b/app/features/validation.py @@ -189,6 +189,55 @@ def detect_outliers( return result +def check_data_sufficiency_for_zero_window( + ts_code: str, + trade_date: str +) -> bool: + """验证窗口为0的因子所需数据是否充分。 + + Args: + ts_code: 股票代码 + trade_date: 交易日期 + + Returns: + 数据是否充分 + """ + from app.utils.data_access import DataBroker + + broker = DataBroker() + + # 记录检查开始 + LOGGER.debug( + "开始检查窗口为0的因子数据充分性 ts_code=%s date=%s", + ts_code, trade_date, + extra=LOG_EXTRA + ) + + # 检查日期点数据完整性 + latest_fields = broker.fetch_latest( + ts_code, + trade_date, + ["daily.close", "daily_basic.turnover_rate", "daily_basic.pe", "daily_basic.pb"] + ) + required_fields = {"daily.close"} + + for field in required_fields: + if latest_fields.get(field) is None: + LOGGER.warning( + "缺少必需字段 field=%s ts_code=%s date=%s", + field, ts_code, trade_date, + extra=LOG_EXTRA + ) + return False + + LOGGER.debug( + "窗口为0的因子数据充分性检查通过 ts_code=%s", + ts_code, + extra=LOG_EXTRA + ) + return True + + def check_data_sufficiency( ts_code: str, trade_date: str,