add zero window factor validation and improve factor existence checking

This commit is contained in:
Your Name 2025-10-09 20:59:27 +08:00
parent 8cc64cd0c0
commit 44adc836fa
4 changed files with 124 additions and 11 deletions

View File

@ -145,3 +145,11 @@ Streamlit `自检测试` 页签提供:
5. **测试与验证**(待补充) 5. **测试与验证**(待补充)
- 需完善部门上下文构造与多模型调用的单元/集成测试,结合回测指标对比多 LLM 策略收益差异。 - 需完善部门上下文构造与多模型调用的单元/集成测试,结合回测指标对比多 LLM 策略收益差异。
TODO
1. 在选股时,因子都已经提前算好,不需要再计算了,直接用就行。
2. 因子计算的公式再确认下
3. 审查整个项目的代码逻辑从main.py开始逐字逐句检查。如一些重复的检查可以去掉未实现的功能请标记TODO并给出实现思路错误的、低效率的调用请修正代码结构性的问题请指出。
4. 梳理整个项目的所有业务逻辑。针对每个业务,从业务实现角度评估代码功能是否存在问题,是否需要优化,是否需要重构。

View File

@ -91,11 +91,13 @@ def evaluate_factor(
try: try:
# 计算因子值 # 计算因子值
# 设置 skip_existing=False确保即使因子已存在也会重新计算
factor_results = compute_factor_range( factor_results = compute_factor_range(
start_date, start_date,
end_date, end_date,
factors=[FactorSpec(factor_name, 0)], factors=[FactorSpec(factor_name, 0)],
ts_codes=universe ts_codes=universe,
skip_existing=False
) )
# 因子计算完成在异步线程中不直接访问factor_progress # 因子计算完成在异步线程中不直接访问factor_progress

View File

@ -17,7 +17,7 @@ from app.features.extended_factors import ExtendedFactors
from app.features.sentiment_factors import SentimentFactors from app.features.sentiment_factors import SentimentFactors
from app.features.value_risk_factors import ValueRiskFactors from app.features.value_risk_factors import ValueRiskFactors
# 导入因子验证功能 # 导入因子验证功能
from app.features.validation import check_data_sufficiency, detect_outliers from app.features.validation import check_data_sufficiency, check_data_sufficiency_for_zero_window, detect_outliers
# 导入UI进度状态管理 # 导入UI进度状态管理
from app.ui.progress_state import factor_progress from app.ui.progress_state import factor_progress
@ -132,11 +132,13 @@ def compute_factors(
return [] return []
if skip_existing: if skip_existing:
existing = _existing_factor_codes(trade_date_str) # 检查所有因子名称
factor_names = [spec.name for spec in specs]
existing = _existing_factor_codes_with_factors(trade_date_str, factor_names)
universe = [code for code in universe if code not in existing] universe = [code for code in universe if code not in existing]
if not universe: if not universe:
LOGGER.debug( LOGGER.debug(
"目标交易日因子已存在 trade_date=%s universe_size=%s", "目标交易日所有因子已存在 trade_date=%s universe_size=%s",
trade_date_str, trade_date_str,
len(existing), len(existing),
extra=LOG_EXTRA, extra=LOG_EXTRA,
@ -289,6 +291,45 @@ def _existing_factor_codes(trade_date: str) -> set[str]:
return {row["ts_code"] for row in rows if row["ts_code"]} return {row["ts_code"] for row in rows if row["ts_code"]}
def _existing_factor_codes_with_factors(trade_date: str, factor_names: List[str]) -> Dict[str, bool]:
"""检查特定日期和因子的数据是否存在
Args:
trade_date: 交易日期
factor_names: 因子名称列表
Returns:
字典键为股票代码值为是否存在所有因子
"""
if not factor_names:
return {}
# 构建检查条件
conditions = []
for name in factor_names:
conditions.append(f"json_extract(factors, '$.{name}') IS NOT NULL")
condition_str = " AND ".join(conditions)
# 构建SQL查询
query = """
SELECT ts_code
FROM factors
WHERE trade_date = ?
AND """ + condition_str + """
GROUP BY ts_code
"""
with db_session(read_only=True) as conn:
rows = conn.execute(query, (trade_date,)).fetchall()
# 返回结果
result = {}
for row in rows:
result[row["ts_code"]] = True
return result
def _list_trade_dates( def _list_trade_dates(
start_date: str, start_date: str,
end_date: str, end_date: str,
@ -603,13 +644,26 @@ def _compute_security_factors(
) )
# 数据有效性检查 # 数据有效性检查
if not check_data_sufficiency(ts_code, trade_date): # 检查是否有窗口为0的因子
LOGGER.debug( has_zero_window = any(spec.window == 0 for spec in specs)
"数据不满足计算条件 ts_code=%s date=%s",
ts_code, trade_date, # 如果有窗口为0的因子使用专门的数据检查函数
extra=LOG_EXTRA if has_zero_window:
) if not check_data_sufficiency_for_zero_window(ts_code, trade_date):
return {} LOGGER.debug(
"数据不满足计算条件(窗口为0) ts_code=%s date=%s",
ts_code, trade_date,
extra=LOG_EXTRA
)
return {}
else:
if not check_data_sufficiency(ts_code, trade_date):
LOGGER.debug(
"数据不满足计算条件 ts_code=%s date=%s",
ts_code, trade_date,
extra=LOG_EXTRA
)
return {}
turnover_series = _fetch_series_values( turnover_series = _fetch_series_values(
broker, broker,

View File

@ -189,6 +189,55 @@ def detect_outliers(
return result return result
def check_data_sufficiency_for_zero_window(
ts_code: str,
trade_date: str
) -> bool:
"""验证窗口为0的因子所需数据是否充分。
Args:
ts_code: 股票代码
trade_date: 交易日期
Returns:
数据是否充分
"""
from app.utils.data_access import DataBroker
broker = DataBroker()
# 记录检查开始
LOGGER.debug(
"开始检查窗口为0的因子数据充分性 ts_code=%s date=%s",
ts_code, trade_date,
extra=LOG_EXTRA
)
# 检查日期点数据完整性
latest_fields = broker.fetch_latest(
ts_code,
trade_date,
["daily.close", "daily_basic.turnover_rate", "daily_basic.pe", "daily_basic.pb"]
)
required_fields = {"daily.close"}
for field in required_fields:
if latest_fields.get(field) is None:
LOGGER.warning(
"缺少必需字段 field=%s ts_code=%s date=%s",
field, ts_code, trade_date,
extra=LOG_EXTRA
)
return False
LOGGER.debug(
"窗口为0的因子数据充分性检查通过 ts_code=%s",
ts_code,
extra=LOG_EXTRA
)
return True
def check_data_sufficiency( def check_data_sufficiency(
ts_code: str, ts_code: str,
trade_date: str, trade_date: str,