From 727ad7cd6dc55354ff0711be9d65c416266024be Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 26 May 2026 17:48:59 +0200 Subject: [PATCH] feat(signals): as-of-date aware cluster detection, open-market-only filter - cluster_detector: pass as_of_date through to DB query so historical signal reprocessing doesn't look into the future - filter_engine: accept as_of_date; skip non-open-market tx_codes (only P/""); reject placeholder tickers (NONE, N/A); propagate as_of_date to cluster detection Co-Authored-By: Claude Sonnet 4.6 --- signals/cluster_detector.py | 6 ++++-- signals/filter_engine.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/signals/cluster_detector.py b/signals/cluster_detector.py index 7f3a172..cb3811f 100644 --- a/signals/cluster_detector.py +++ b/signals/cluster_detector.py @@ -1,9 +1,11 @@ +from typing import Optional + from db.db import get_recent_buys_for_ticker import config -def detect_cluster(ticker: str) -> dict: - buys = get_recent_buys_for_ticker(ticker, config.CLUSTER_WINDOW_DAYS) +def detect_cluster(ticker: str, as_of_date: Optional[str] = None) -> dict: + buys = get_recent_buys_for_ticker(ticker, config.CLUSTER_WINDOW_DAYS, as_of_date=as_of_date) unique_insiders = {b["insider_name"] for b in buys} total_value = sum(b["total_value"] or 0 for b in buys) return { diff --git a/signals/filter_engine.py b/signals/filter_engine.py index ad020fb..fcca4ef 100644 --- a/signals/filter_engine.py +++ b/signals/filter_engine.py @@ -25,10 +25,16 @@ def _score(total_value: float, role: str, cluster_size: int) -> float: return base * math.log(total_value) * cluster_mult -def process_filing(filing: dict) -> Optional[dict]: +def process_filing(filing: dict, as_of_date: Optional[str] = None) -> Optional[dict]: if filing.get("flag") != "A": return None + # Only open market purchases (P) or non-coded buys; exclude option exercises (X), grants (A), etc. + tx_code = filing.get("tx_code", "") + if tx_code and tx_code not in ("P", ""): + logger.debug(f"Skipping non-open-market tx_code={tx_code}: {filing['accession_number']}") + return None + if filing.get("is_10b51"): logger.debug(f"Skipping 10b5-1 filing: {filing['accession_number']}") return None @@ -39,10 +45,10 @@ def process_filing(filing: dict) -> Optional[dict]: return None ticker = filing.get("ticker", "") - if not ticker: + if not ticker or ticker.upper() in ("NONE", "N/A", "NA"): return None - cluster_info = detect_cluster(ticker) + cluster_info = detect_cluster(ticker, as_of_date=as_of_date) cluster_size = cluster_info["cluster_size"] total_cluster_value = cluster_info["total_cluster_value"]