smaug/signals/filter_engine.py
Dominik Roth 727ad7cd6d feat(signals): as-of-date aware cluster detection, open-market-only filter
- cluster_detector: pass as_of_date through to DB query so historical signal
  reprocessing doesn't look into the future
- filter_engine: accept as_of_date; skip non-open-market tx_codes (only P/"");
  reject placeholder tickers (NONE, N/A); propagate as_of_date to cluster detection

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 17:48:59 +02:00

74 lines
2.3 KiB
Python

import math
import logging
from typing import Optional
import config
from signals.cluster_detector import detect_cluster
from db.db import insert_signal
logger = logging.getLogger(__name__)
def _role_weight(role: str) -> float:
role_lower = (role or "").lower()
for key, weight in config.ROLE_WEIGHTS.items():
if key in role_lower:
return weight
return config.DEFAULT_ROLE_WEIGHT
def _score(total_value: float, role: str, cluster_size: int) -> float:
if not total_value or total_value <= 0:
return 0.0
base = _role_weight(role)
cluster_mult = 1.0 + 0.5 * (cluster_size - 1)
return base * math.log(total_value) * cluster_mult
def process_filing(filing: dict, as_of_date: Optional[str] = None) -> Optional[dict]:
if filing.get("flag") != "A":
return None
# Only open market purchases (P) or non-coded buys; exclude option exercises (X), grants (A), etc.
tx_code = filing.get("tx_code", "")
if tx_code and tx_code not in ("P", ""):
logger.debug(f"Skipping non-open-market tx_code={tx_code}: {filing['accession_number']}")
return None
if filing.get("is_10b51"):
logger.debug(f"Skipping 10b5-1 filing: {filing['accession_number']}")
return None
total_value = filing.get("total_value") or 0
if total_value < config.MIN_TRANSACTION_VALUE:
logger.debug(f"Below min value: {filing['accession_number']} (${total_value:,.0f})")
return None
ticker = filing.get("ticker", "")
if not ticker or ticker.upper() in ("NONE", "N/A", "NA"):
return None
cluster_info = detect_cluster(ticker, as_of_date=as_of_date)
cluster_size = cluster_info["cluster_size"]
total_cluster_value = cluster_info["total_cluster_value"]
if cluster_size < config.MIN_CLUSTER_SIZE:
return None
score = _score(total_value, filing.get("role", ""), cluster_size)
signal = {
"ticker": ticker,
"trigger_date": filing.get("transaction_date", ""),
"cluster_size": cluster_size,
"total_cluster_value": total_cluster_value,
"score": round(score, 2),
"filing": filing,
"cluster_buys": cluster_info["buys"],
}
signal_id = insert_signal(signal)
signal["id"] = signal_id
logger.info(f"Signal generated: {ticker} score={score:.2f} cluster={cluster_size}")
return signal