smaug/ingestion/form4_parser.py
Claude 7e9221a914 feat: add PLAN.md and insider copytrade POC implementation
- PLAN.md: full implementation plan from issue
- config.py: configurable thresholds, API keys via .env
- ingestion/: EDGAR RSS poller + Form 4 XML parser
- db/: SQLite schema + interface (WAL mode)
- signals/: filter engine (buy/10b5-1/value/role) + cluster detector
- alerts/: Slack webhook alert with score gating
- broker/: Alpaca paper/live trade execution
- backtest/: historical signal backtesting with yfinance
- main.py: CLI entrypoint (run | fetch-once | backtest)
2026-05-04 16:15:22 +00:00

96 lines
2.7 KiB
Python

import re
from lxml import etree
from typing import Optional
_10B51_PATTERNS = [
r"10b5-1",
r"rule 10b5",
r"adopted a plan",
r"10b5\(1\)",
]
def _is_10b51(text: str) -> bool:
text_lower = text.lower()
return any(re.search(p, text_lower) for p in _10B51_PATTERNS)
def _text(el, tag: str) -> Optional[str]:
node = el.find(".//" + tag)
if node is not None and node.text:
return node.text.strip()
return None
def _float(el, tag: str) -> Optional[float]:
val = _text(el, tag)
if val is None:
return None
try:
return float(val.replace(",", ""))
except ValueError:
return None
def parse_form4(xml_bytes: bytes, accession_number: str, filed_date: str) -> list[dict]:
try:
root = etree.fromstring(xml_bytes)
except etree.XMLSyntaxError:
return []
ticker = _text(root, "issuerTradingSymbol") or ""
cik = _text(root, "issuerCik") or ""
insider_name = _text(root, "rptOwnerName") or ""
role = _text(root, "officerTitle") or _text(root, "isDirector") or ""
footnotes_text = " ".join(
(node.text or "") for node in root.findall(".//footnote")
)
global_10b51 = _is_10b51(footnotes_text)
transactions = root.findall(".//nonDerivativeTransaction")
results = []
for tx in transactions:
flag = _text(tx, "transactionAcquiredDisposedCode")
if not flag:
continue
shares = _float(tx, "transactionShares")
price = _float(tx, "transactionPricePerShare")
total_value = _float(tx, "transactionTotalValue")
if total_value is None and shares is not None and price is not None:
total_value = shares * price
post_tx_shares = _float(tx, "sharesOwnedFollowingTransaction")
tx_date = _text(tx, "transactionDate") or filed_date
tx_footnote_ids = [
fn.get("id", "") for fn in tx.findall(".//footnoteId")
]
tx_footnote_text = " ".join(
(root.find(f".//footnote[@id='{fid}']") or etree.Element("x")).text or ""
for fid in tx_footnote_ids
)
is_10b51 = int(global_10b51 or _is_10b51(tx_footnote_text))
results.append(
{
"accession_number": accession_number,
"ticker": ticker.upper(),
"cik": cik,
"insider_name": insider_name,
"role": role,
"transaction_date": tx_date,
"filed_date": filed_date,
"shares": shares,
"price": price,
"total_value": total_value,
"flag": flag.upper(),
"is_10b51": is_10b51,
"post_tx_shares": post_tx_shares,
}
)
return results