From 1467033aa296722c61b76f0b27ab07b2a32d13d4 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 26 May 2026 17:49:14 +0200 Subject: [PATCH] feat(backtest): portfolio simulator with configurable strategy and transaction costs Event-driven simulation: 1-day buy delay, N-day hold, position-size % of cash. Models entry cost (spread + slippage + commission) and exit cost (spread + commission) so round-trip is fully parameterised from the CLI. Reports: annualized return, SPY benchmark, excess return, max drawdown, Sharpe, per-trade win rate and avg net return. CLI: python main.py simulate [--holding-days 7] [--spread 0.003] [--slippage 0.002] ... Also runnable directly: python backtest/simulate.py --help Co-Authored-By: Claude Sonnet 4.6 --- backtest/simulate.py | 405 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 405 insertions(+) create mode 100644 backtest/simulate.py diff --git a/backtest/simulate.py b/backtest/simulate.py new file mode 100644 index 0000000..d2d737c --- /dev/null +++ b/backtest/simulate.py @@ -0,0 +1,405 @@ +""" +Portfolio simulator for the insider-copytrade strategy. + +Usage: + python backtest/simulate.py [options] + +Strategy params: + --holding-days Calendar days to hold each position (default: 7) + --buy-delay Days after signal trigger to enter (default: 1) + --position-size Fraction of available cash per trade (default: 0.10) + --min-score Minimum signal score filter (default: 0.0) + --min-cluster Minimum cluster size filter (default: 1) + --capital Initial capital in USD (default: 100000) + +Transaction cost params: + --spread One-way bid-ask half-spread, e.g. 0.003 = 0.3% (default: 0.003) + --slippage Entry slippage / market impact (default: 0.002) + --commission Flat per-trade commission as fraction of notional (default: 0.001) + +Round-trip cost = spread*2 + slippage + commission*2 (applied at buy and sell) +""" + +import argparse +import logging +import math +import os +import sys +from collections import defaultdict +from datetime import datetime, timedelta + +# Allow running as script from repo root +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import config +from db.db import get_signals_for_backtest + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Price loading +# --------------------------------------------------------------------------- + +def _load_all_prices() -> dict[str, dict[str, float]]: + """Load entire price cache from DB into memory: {ticker: {date: close}}.""" + from sqlalchemy import create_engine, text + + engine = create_engine( + f"sqlite:///{config.DB_PATH}", + connect_args={"check_same_thread": False}, + ) + with engine.connect() as conn: + rows = conn.execute(text("SELECT ticker, date, close FROM price_cache")).fetchall() + + prices: dict[str, dict[str, float]] = defaultdict(dict) + for ticker, date, close in rows: + prices[ticker][date] = close + logger.info(f"Loaded prices for {len(prices)} tickers ({sum(len(v) for v in prices.values())} rows)") + return dict(prices) + + +def _closest_price_on_or_after(prices: dict[str, float], date_str: str) -> float | None: + for d in sorted(prices): + if d >= date_str: + return prices[d] + return None + + +def _closest_price_on_or_before(prices: dict[str, float], date_str: str) -> float | None: + result = None + for d in sorted(prices): + if d <= date_str: + result = prices[d] + else: + break + return result + + +# --------------------------------------------------------------------------- +# Core simulation +# --------------------------------------------------------------------------- + +class Strategy: + def __init__( + self, + holding_days: int = 7, + buy_delay: int = 1, + position_size: float = 0.10, + min_score: float = 0.0, + min_cluster: int = 1, + capital: float = 100_000.0, + spread: float = 0.003, + slippage: float = 0.002, + commission: float = 0.001, + ): + self.holding_days = holding_days + self.buy_delay = buy_delay + self.position_size = position_size + self.min_score = min_score + self.min_cluster = min_cluster + self.capital = capital + self.spread = spread + self.slippage = slippage + self.commission = commission + + # cost applied at entry: half-spread + slippage + commission + @property + def entry_cost(self) -> float: + return self.spread + self.slippage + self.commission + + # cost applied at exit: half-spread + commission + @property + def exit_cost(self) -> float: + return self.spread + self.commission + + @property + def roundtrip_cost(self) -> float: + return self.entry_cost + self.exit_cost + + +def simulate(strategy: Strategy) -> dict: + signals = get_signals_for_backtest(strategy.min_score, strategy.min_cluster) + + # Filter malformed dates + valid = [] + for s in signals: + try: + date_str = s["trigger_date"][:10] + yr = int(date_str[:4]) + if yr >= 2020: + s = dict(s, trigger_date=date_str) + valid.append(s) + except Exception: + pass + signals = valid + + if not signals: + return {"error": "No signals after filtering"} + + prices = _load_all_prices() + + # Build trade list: {entry_date_str: [(ticker, exit_date_str, signal)]} + trades_by_entry: dict[str, list] = defaultdict(list) + for sig in signals: + trigger_dt = datetime.strptime(sig["trigger_date"], "%Y-%m-%d") + entry_dt = trigger_dt + timedelta(days=strategy.buy_delay) + exit_dt = entry_dt + timedelta(days=strategy.holding_days) + entry_str = entry_dt.strftime("%Y-%m-%d") + exit_str = exit_dt.strftime("%Y-%m-%d") + trades_by_entry[entry_str].append((sig["ticker"], exit_str, sig)) + + # Collect all dates with events + all_dates = sorted(set(trades_by_entry.keys())) + + # State + cash = strategy.capital + # open positions: list of (exit_date_str, ticker, cost_basis, shares, notional_invested) + open_positions: list[tuple[str, str, float, float, float]] = [] + + equity_curve: list[tuple[str, float]] = [] # (date, portfolio_value) + trade_log: list[dict] = [] + trades_executed = 0 + trades_skipped_no_price = 0 + + spy_prices = prices.get("SPY", {}) + + for date_str in all_dates: + # 1. Close any positions whose exit_date <= today + still_open = [] + for pos in open_positions: + exit_dt_str, ticker, cost_basis, shares, notional = pos + if exit_dt_str <= date_str: + px = prices.get(ticker, {}) + exit_price = _closest_price_on_or_before(px, exit_dt_str) + if exit_price is None: + exit_price = _closest_price_on_or_before(px, date_str) + if exit_price is None: + # can't find exit price — recover notional (no gain/loss) + cash += notional + trade_log.append({ + "ticker": ticker, + "entry_date": date_str, + "exit_date": exit_dt_str, + "gross_return": 0.0, + "net_return": 0.0, + "pnl": 0.0, + "note": "no_exit_price", + }) + continue + + gross_return = (exit_price - cost_basis) / cost_basis + net_return = gross_return - strategy.exit_cost + exit_proceeds = notional * (1 + net_return) + cash += exit_proceeds + + trade_log.append({ + "ticker": ticker, + "exit_date": exit_dt_str, + "gross_return": round(gross_return, 5), + "net_return": round(net_return, 5), + "pnl": round(exit_proceeds - notional, 2), + "notional": round(notional, 2), + }) + else: + still_open.append(pos) + open_positions = still_open + + # 2. Open new positions for today's signals + for ticker, exit_date_str, sig in trades_by_entry[date_str]: + px = prices.get(ticker, {}) + entry_price = _closest_price_on_or_after(px, date_str) + if entry_price is None: + trades_skipped_no_price += 1 + continue + + notional = cash * strategy.position_size + if notional < 1.0: + continue + + # Deduct entry cost from proceeds (effective entry price is higher) + effective_entry = entry_price * (1 + strategy.entry_cost) + shares = notional / effective_entry + cash -= notional + open_positions.append((exit_date_str, ticker, effective_entry, shares, notional)) + trades_executed += 1 + + # Track equity (cash + mark-to-market open positions at cost basis — conservative) + open_value = sum(n for _, _, _, _, n in open_positions) + equity_curve.append((date_str, cash + open_value)) + + # Close all remaining open positions at last available price + for exit_dt_str, ticker, cost_basis, shares, notional in open_positions: + px = prices.get(ticker, {}) + exit_price = _closest_price_on_or_before(px, exit_dt_str) or cost_basis + gross_return = (exit_price - cost_basis) / cost_basis + net_return = gross_return - strategy.exit_cost + cash += notional * (1 + net_return) + + final_value = cash + + # SPY benchmark + if equity_curve and spy_prices: + start_str = equity_curve[0][0] + end_str = equity_curve[-1][0] + spy_start = _closest_price_on_or_after(spy_prices, start_str) + spy_end = _closest_price_on_or_before(spy_prices, end_str) + spy_total = (spy_end - spy_start) / spy_start if (spy_start and spy_end) else 0.0 + else: + spy_total = 0.0 + + # Annualized metrics + if equity_curve: + start_dt = datetime.strptime(equity_curve[0][0], "%Y-%m-%d") + end_dt = datetime.strptime(equity_curve[-1][0], "%Y-%m-%d") + years = max((end_dt - start_dt).days / 365.25, 0.001) + else: + years = 1.0 + + total_return = (final_value - strategy.capital) / strategy.capital + ann_return = (1 + total_return) ** (1 / years) - 1 + spy_ann = (1 + spy_total) ** (1 / years) - 1 + + # Max drawdown from equity curve + peak = strategy.capital + max_dd = 0.0 + for _, val in equity_curve: + if val > peak: + peak = val + dd = (peak - val) / peak + if dd > max_dd: + max_dd = dd + + # Per-trade Sharpe from trade log + net_returns = [t["net_return"] for t in trade_log if "net_return" in t] + if net_returns: + avg_r = sum(net_returns) / len(net_returns) + std_r = math.sqrt(sum((r - avg_r) ** 2 for r in net_returns) / len(net_returns)) + trades_per_year = trades_executed / years + sharpe = (avg_r / std_r * math.sqrt(trades_per_year)) if std_r > 0 else 0.0 + win_rate = sum(1 for r in net_returns if r > 0) / len(net_returns) + avg_net_return = avg_r + else: + sharpe = win_rate = avg_net_return = 0.0 + + return { + "strategy": { + "holding_days": strategy.holding_days, + "buy_delay": strategy.buy_delay, + "position_size": strategy.position_size, + "min_score": strategy.min_score, + "min_cluster": strategy.min_cluster, + "roundtrip_cost_pct": round(strategy.roundtrip_cost * 100, 3), + }, + "period": { + "start": equity_curve[0][0] if equity_curve else "n/a", + "end": equity_curve[-1][0] if equity_curve else "n/a", + "years": round(years, 2), + }, + "performance": { + "initial_capital": strategy.capital, + "final_value": round(final_value, 2), + "total_return_pct": round(total_return * 100, 2), + "annualized_return_pct": round(ann_return * 100, 2), + "spy_annualized_pct": round(spy_ann * 100, 2), + "excess_return_pct": round((ann_return - spy_ann) * 100, 2), + "max_drawdown_pct": round(max_dd * 100, 2), + "sharpe": round(sharpe, 3), + }, + "trades": { + "signals_total": len(signals), + "executed": trades_executed, + "skipped_no_price": trades_skipped_no_price, + "win_rate_pct": round(win_rate * 100, 2), + "avg_net_return_pct": round(avg_net_return * 100, 3), + }, + } + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def _print_results(r: dict): + if "error" in r: + print(f"Error: {r['error']}") + return + + s = r["strategy"] + p = r["performance"] + t = r["trades"] + period = r["period"] + + w = 48 + print(f"\n{'=' * w}") + print(f" Portfolio Simulation Results") + print(f"{'=' * w}") + print(f" Strategy") + print(f" Hold: {s['holding_days']}d | Delay: {s['buy_delay']}d | Size: {s['position_size']*100:.0f}% of cash") + print(f" Score ≥ {s['min_score']} | Cluster ≥ {s['min_cluster']}") + print(f" Round-trip cost: {s['roundtrip_cost_pct']:.2f}%") + print(f" Period: {period['start']} → {period['end']} ({period['years']}y)") + print(f"{'─' * w}") + print(f" Capital: ${p['initial_capital']:>12,.0f} → ${p['final_value']:>12,.2f}") + print(f" Total ret: {p['total_return_pct']:>+8.1f}%") + print(f" Ann. ret: {p['annualized_return_pct']:>+8.1f}% (SPY: {p['spy_annualized_pct']:+.1f}%)") + print(f" Excess: {p['excess_return_pct']:>+8.1f}%") + print(f" Max DD: {p['max_drawdown_pct']:>8.1f}%") + print(f" Sharpe: {p['sharpe']:>8.3f}") + print(f"{'─' * w}") + print(f" Trades executed: {t['executed']:>6} / {t['signals_total']} signals") + print(f" Skipped (no px): {t['skipped_no_price']:>6}") + print(f" Win rate: {t['win_rate_pct']:>5.1f}%") + print(f" Avg net return: {t['avg_net_return_pct']:>+6.3f}% per trade") + print(f"{'=' * w}\n") + + +def main(): + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + ) + + parser = argparse.ArgumentParser( + description="Simulate insider-copytrade portfolio with realistic costs", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + # Strategy + parser.add_argument("--holding-days", type=int, default=7) + parser.add_argument("--buy-delay", type=int, default=1) + parser.add_argument("--position-size", type=float, default=0.10, + help="Fraction of available cash per trade (0.10 = 10%%)") + parser.add_argument("--min-score", type=float, default=0.0) + parser.add_argument("--min-cluster", type=int, default=1) + parser.add_argument("--capital", type=float, default=100_000.0) + # Costs + parser.add_argument("--spread", type=float, default=0.003, + help="Half bid-ask spread paid on entry AND exit (0.003 = 0.3%%)") + parser.add_argument("--slippage", type=float, default=0.002, + help="Entry slippage / market impact (0.002 = 0.2%%)") + parser.add_argument("--commission", type=float, default=0.001, + help="Per-trade commission as fraction of notional") + + args = parser.parse_args() + + from db.db import init_db + init_db() + + strategy = Strategy( + holding_days=args.holding_days, + buy_delay=args.buy_delay, + position_size=args.position_size, + min_score=args.min_score, + min_cluster=args.min_cluster, + capital=args.capital, + spread=args.spread, + slippage=args.slippage, + commission=args.commission, + ) + + result = simulate(strategy) + _print_results(result) + + +if __name__ == "__main__": + main()