llm-quant/app/ingest/rss.py
2025-09-26 18:21:25 +08:00

43 lines
934 B
Python

"""RSS ingestion for news and heat scores."""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from typing import Iterable, List
@dataclass
class RssItem:
id: str
title: str
link: str
published: datetime
summary: str
source: str
def fetch_rss_feed(url: str) -> List[RssItem]:
"""Download and parse an RSS feed into structured items."""
raise NotImplementedError
def deduplicate_items(items: Iterable[RssItem]) -> List[RssItem]:
"""Drop duplicate stories by link/id fingerprint."""
seen = set()
unique: List[RssItem] = []
for item in items:
key = item.id or item.link
if key in seen:
continue
seen.add(key)
unique.append(item)
return unique
def save_news_items(items: Iterable[RssItem]) -> None:
"""Persist RSS items into the `news` table."""
raise NotImplementedError