import re from lxml import etree from typing import Optional _10B51_PATTERNS = [ r"10b5-1", r"rule 10b5", r"adopted a plan", r"10b5\(1\)", ] def _is_10b51(text: str) -> bool: text_lower = text.lower() return any(re.search(p, text_lower) for p in _10B51_PATTERNS) def _text(el, tag: str) -> Optional[str]: node = el.find(".//" + tag) if node is not None and node.text: return node.text.strip() return None def _float(el, tag: str) -> Optional[float]: val = _text(el, tag) if val is None: return None try: return float(val.replace(",", "")) except ValueError: return None def parse_form4(xml_bytes: bytes, accession_number: str, filed_date: str) -> list[dict]: try: root = etree.fromstring(xml_bytes) except etree.XMLSyntaxError: return [] ticker = _text(root, "issuerTradingSymbol") or "" cik = _text(root, "issuerCik") or "" insider_name = _text(root, "rptOwnerName") or "" role = _text(root, "officerTitle") or _text(root, "isDirector") or "" footnotes_text = " ".join( (node.text or "") for node in root.findall(".//footnote") ) global_10b51 = _is_10b51(footnotes_text) transactions = root.findall(".//nonDerivativeTransaction") results = [] for tx in transactions: flag = _text(tx, "transactionAcquiredDisposedCode") if not flag: continue shares = _float(tx, "transactionShares") price = _float(tx, "transactionPricePerShare") total_value = _float(tx, "transactionTotalValue") if total_value is None and shares is not None and price is not None: total_value = shares * price post_tx_shares = _float(tx, "sharesOwnedFollowingTransaction") tx_date = _text(tx, "transactionDate") or filed_date tx_footnote_ids = [ fn.get("id", "") for fn in tx.findall(".//footnoteId") ] tx_footnote_text = " ".join( (root.find(f".//footnote[@id='{fid}']") or etree.Element("x")).text or "" for fid in tx_footnote_ids ) is_10b51 = int(global_10b51 or _is_10b51(tx_footnote_text)) results.append( { "accession_number": accession_number, "ticker": ticker.upper(), "cik": cik, "insider_name": insider_name, "role": role, "transaction_date": tx_date, "filed_date": filed_date, "shares": shares, "price": price, "total_value": total_value, "flag": flag.upper(), "is_10b51": is_10b51, "post_tx_shares": post_tx_shares, } ) return results