feat(signals): as-of-date aware cluster detection, open-market-only filter

- cluster_detector: pass as_of_date through to DB query so historical signal
  reprocessing doesn't look into the future
- filter_engine: accept as_of_date; skip non-open-market tx_codes (only P/"");
  reject placeholder tickers (NONE, N/A); propagate as_of_date to cluster detection

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Dominik Moritz Roth 2026-05-26 17:48:59 +02:00
parent b5268f063e
commit 727ad7cd6d
2 changed files with 13 additions and 5 deletions

View File

@ -1,9 +1,11 @@
from typing import Optional
from db.db import get_recent_buys_for_ticker from db.db import get_recent_buys_for_ticker
import config import config
def detect_cluster(ticker: str) -> dict: def detect_cluster(ticker: str, as_of_date: Optional[str] = None) -> dict:
buys = get_recent_buys_for_ticker(ticker, config.CLUSTER_WINDOW_DAYS) buys = get_recent_buys_for_ticker(ticker, config.CLUSTER_WINDOW_DAYS, as_of_date=as_of_date)
unique_insiders = {b["insider_name"] for b in buys} unique_insiders = {b["insider_name"] for b in buys}
total_value = sum(b["total_value"] or 0 for b in buys) total_value = sum(b["total_value"] or 0 for b in buys)
return { return {

View File

@ -25,10 +25,16 @@ def _score(total_value: float, role: str, cluster_size: int) -> float:
return base * math.log(total_value) * cluster_mult return base * math.log(total_value) * cluster_mult
def process_filing(filing: dict) -> Optional[dict]: def process_filing(filing: dict, as_of_date: Optional[str] = None) -> Optional[dict]:
if filing.get("flag") != "A": if filing.get("flag") != "A":
return None return None
# Only open market purchases (P) or non-coded buys; exclude option exercises (X), grants (A), etc.
tx_code = filing.get("tx_code", "")
if tx_code and tx_code not in ("P", ""):
logger.debug(f"Skipping non-open-market tx_code={tx_code}: {filing['accession_number']}")
return None
if filing.get("is_10b51"): if filing.get("is_10b51"):
logger.debug(f"Skipping 10b5-1 filing: {filing['accession_number']}") logger.debug(f"Skipping 10b5-1 filing: {filing['accession_number']}")
return None return None
@ -39,10 +45,10 @@ def process_filing(filing: dict) -> Optional[dict]:
return None return None
ticker = filing.get("ticker", "") ticker = filing.get("ticker", "")
if not ticker: if not ticker or ticker.upper() in ("NONE", "N/A", "NA"):
return None return None
cluster_info = detect_cluster(ticker) cluster_info = detect_cluster(ticker, as_of_date=as_of_date)
cluster_size = cluster_info["cluster_size"] cluster_size = cluster_info["cluster_size"]
total_cluster_value = cluster_info["total_cluster_value"] total_cluster_value = cluster_info["total_cluster_value"]