feat(backtest): portfolio simulator with configurable strategy and transaction costs

Event-driven simulation: 1-day buy delay, N-day hold, position-size % of cash.
Models entry cost (spread + slippage + commission) and exit cost (spread + commission)
so round-trip is fully parameterised from the CLI.

Reports: annualized return, SPY benchmark, excess return, max drawdown, Sharpe,
per-trade win rate and avg net return.

CLI: python main.py simulate [--holding-days 7] [--spread 0.003] [--slippage 0.002] ...
Also runnable directly: python backtest/simulate.py --help

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Dominik Moritz Roth 2026-05-26 17:49:14 +02:00
parent fb86443987
commit 1467033aa2

405
backtest/simulate.py Normal file
View File

@ -0,0 +1,405 @@
"""
Portfolio simulator for the insider-copytrade strategy.
Usage:
python backtest/simulate.py [options]
Strategy params:
--holding-days Calendar days to hold each position (default: 7)
--buy-delay Days after signal trigger to enter (default: 1)
--position-size Fraction of available cash per trade (default: 0.10)
--min-score Minimum signal score filter (default: 0.0)
--min-cluster Minimum cluster size filter (default: 1)
--capital Initial capital in USD (default: 100000)
Transaction cost params:
--spread One-way bid-ask half-spread, e.g. 0.003 = 0.3% (default: 0.003)
--slippage Entry slippage / market impact (default: 0.002)
--commission Flat per-trade commission as fraction of notional (default: 0.001)
Round-trip cost = spread*2 + slippage + commission*2 (applied at buy and sell)
"""
import argparse
import logging
import math
import os
import sys
from collections import defaultdict
from datetime import datetime, timedelta
# Allow running as script from repo root
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import config
from db.db import get_signals_for_backtest
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Price loading
# ---------------------------------------------------------------------------
def _load_all_prices() -> dict[str, dict[str, float]]:
"""Load entire price cache from DB into memory: {ticker: {date: close}}."""
from sqlalchemy import create_engine, text
engine = create_engine(
f"sqlite:///{config.DB_PATH}",
connect_args={"check_same_thread": False},
)
with engine.connect() as conn:
rows = conn.execute(text("SELECT ticker, date, close FROM price_cache")).fetchall()
prices: dict[str, dict[str, float]] = defaultdict(dict)
for ticker, date, close in rows:
prices[ticker][date] = close
logger.info(f"Loaded prices for {len(prices)} tickers ({sum(len(v) for v in prices.values())} rows)")
return dict(prices)
def _closest_price_on_or_after(prices: dict[str, float], date_str: str) -> float | None:
for d in sorted(prices):
if d >= date_str:
return prices[d]
return None
def _closest_price_on_or_before(prices: dict[str, float], date_str: str) -> float | None:
result = None
for d in sorted(prices):
if d <= date_str:
result = prices[d]
else:
break
return result
# ---------------------------------------------------------------------------
# Core simulation
# ---------------------------------------------------------------------------
class Strategy:
def __init__(
self,
holding_days: int = 7,
buy_delay: int = 1,
position_size: float = 0.10,
min_score: float = 0.0,
min_cluster: int = 1,
capital: float = 100_000.0,
spread: float = 0.003,
slippage: float = 0.002,
commission: float = 0.001,
):
self.holding_days = holding_days
self.buy_delay = buy_delay
self.position_size = position_size
self.min_score = min_score
self.min_cluster = min_cluster
self.capital = capital
self.spread = spread
self.slippage = slippage
self.commission = commission
# cost applied at entry: half-spread + slippage + commission
@property
def entry_cost(self) -> float:
return self.spread + self.slippage + self.commission
# cost applied at exit: half-spread + commission
@property
def exit_cost(self) -> float:
return self.spread + self.commission
@property
def roundtrip_cost(self) -> float:
return self.entry_cost + self.exit_cost
def simulate(strategy: Strategy) -> dict:
signals = get_signals_for_backtest(strategy.min_score, strategy.min_cluster)
# Filter malformed dates
valid = []
for s in signals:
try:
date_str = s["trigger_date"][:10]
yr = int(date_str[:4])
if yr >= 2020:
s = dict(s, trigger_date=date_str)
valid.append(s)
except Exception:
pass
signals = valid
if not signals:
return {"error": "No signals after filtering"}
prices = _load_all_prices()
# Build trade list: {entry_date_str: [(ticker, exit_date_str, signal)]}
trades_by_entry: dict[str, list] = defaultdict(list)
for sig in signals:
trigger_dt = datetime.strptime(sig["trigger_date"], "%Y-%m-%d")
entry_dt = trigger_dt + timedelta(days=strategy.buy_delay)
exit_dt = entry_dt + timedelta(days=strategy.holding_days)
entry_str = entry_dt.strftime("%Y-%m-%d")
exit_str = exit_dt.strftime("%Y-%m-%d")
trades_by_entry[entry_str].append((sig["ticker"], exit_str, sig))
# Collect all dates with events
all_dates = sorted(set(trades_by_entry.keys()))
# State
cash = strategy.capital
# open positions: list of (exit_date_str, ticker, cost_basis, shares, notional_invested)
open_positions: list[tuple[str, str, float, float, float]] = []
equity_curve: list[tuple[str, float]] = [] # (date, portfolio_value)
trade_log: list[dict] = []
trades_executed = 0
trades_skipped_no_price = 0
spy_prices = prices.get("SPY", {})
for date_str in all_dates:
# 1. Close any positions whose exit_date <= today
still_open = []
for pos in open_positions:
exit_dt_str, ticker, cost_basis, shares, notional = pos
if exit_dt_str <= date_str:
px = prices.get(ticker, {})
exit_price = _closest_price_on_or_before(px, exit_dt_str)
if exit_price is None:
exit_price = _closest_price_on_or_before(px, date_str)
if exit_price is None:
# can't find exit price — recover notional (no gain/loss)
cash += notional
trade_log.append({
"ticker": ticker,
"entry_date": date_str,
"exit_date": exit_dt_str,
"gross_return": 0.0,
"net_return": 0.0,
"pnl": 0.0,
"note": "no_exit_price",
})
continue
gross_return = (exit_price - cost_basis) / cost_basis
net_return = gross_return - strategy.exit_cost
exit_proceeds = notional * (1 + net_return)
cash += exit_proceeds
trade_log.append({
"ticker": ticker,
"exit_date": exit_dt_str,
"gross_return": round(gross_return, 5),
"net_return": round(net_return, 5),
"pnl": round(exit_proceeds - notional, 2),
"notional": round(notional, 2),
})
else:
still_open.append(pos)
open_positions = still_open
# 2. Open new positions for today's signals
for ticker, exit_date_str, sig in trades_by_entry[date_str]:
px = prices.get(ticker, {})
entry_price = _closest_price_on_or_after(px, date_str)
if entry_price is None:
trades_skipped_no_price += 1
continue
notional = cash * strategy.position_size
if notional < 1.0:
continue
# Deduct entry cost from proceeds (effective entry price is higher)
effective_entry = entry_price * (1 + strategy.entry_cost)
shares = notional / effective_entry
cash -= notional
open_positions.append((exit_date_str, ticker, effective_entry, shares, notional))
trades_executed += 1
# Track equity (cash + mark-to-market open positions at cost basis — conservative)
open_value = sum(n for _, _, _, _, n in open_positions)
equity_curve.append((date_str, cash + open_value))
# Close all remaining open positions at last available price
for exit_dt_str, ticker, cost_basis, shares, notional in open_positions:
px = prices.get(ticker, {})
exit_price = _closest_price_on_or_before(px, exit_dt_str) or cost_basis
gross_return = (exit_price - cost_basis) / cost_basis
net_return = gross_return - strategy.exit_cost
cash += notional * (1 + net_return)
final_value = cash
# SPY benchmark
if equity_curve and spy_prices:
start_str = equity_curve[0][0]
end_str = equity_curve[-1][0]
spy_start = _closest_price_on_or_after(spy_prices, start_str)
spy_end = _closest_price_on_or_before(spy_prices, end_str)
spy_total = (spy_end - spy_start) / spy_start if (spy_start and spy_end) else 0.0
else:
spy_total = 0.0
# Annualized metrics
if equity_curve:
start_dt = datetime.strptime(equity_curve[0][0], "%Y-%m-%d")
end_dt = datetime.strptime(equity_curve[-1][0], "%Y-%m-%d")
years = max((end_dt - start_dt).days / 365.25, 0.001)
else:
years = 1.0
total_return = (final_value - strategy.capital) / strategy.capital
ann_return = (1 + total_return) ** (1 / years) - 1
spy_ann = (1 + spy_total) ** (1 / years) - 1
# Max drawdown from equity curve
peak = strategy.capital
max_dd = 0.0
for _, val in equity_curve:
if val > peak:
peak = val
dd = (peak - val) / peak
if dd > max_dd:
max_dd = dd
# Per-trade Sharpe from trade log
net_returns = [t["net_return"] for t in trade_log if "net_return" in t]
if net_returns:
avg_r = sum(net_returns) / len(net_returns)
std_r = math.sqrt(sum((r - avg_r) ** 2 for r in net_returns) / len(net_returns))
trades_per_year = trades_executed / years
sharpe = (avg_r / std_r * math.sqrt(trades_per_year)) if std_r > 0 else 0.0
win_rate = sum(1 for r in net_returns if r > 0) / len(net_returns)
avg_net_return = avg_r
else:
sharpe = win_rate = avg_net_return = 0.0
return {
"strategy": {
"holding_days": strategy.holding_days,
"buy_delay": strategy.buy_delay,
"position_size": strategy.position_size,
"min_score": strategy.min_score,
"min_cluster": strategy.min_cluster,
"roundtrip_cost_pct": round(strategy.roundtrip_cost * 100, 3),
},
"period": {
"start": equity_curve[0][0] if equity_curve else "n/a",
"end": equity_curve[-1][0] if equity_curve else "n/a",
"years": round(years, 2),
},
"performance": {
"initial_capital": strategy.capital,
"final_value": round(final_value, 2),
"total_return_pct": round(total_return * 100, 2),
"annualized_return_pct": round(ann_return * 100, 2),
"spy_annualized_pct": round(spy_ann * 100, 2),
"excess_return_pct": round((ann_return - spy_ann) * 100, 2),
"max_drawdown_pct": round(max_dd * 100, 2),
"sharpe": round(sharpe, 3),
},
"trades": {
"signals_total": len(signals),
"executed": trades_executed,
"skipped_no_price": trades_skipped_no_price,
"win_rate_pct": round(win_rate * 100, 2),
"avg_net_return_pct": round(avg_net_return * 100, 3),
},
}
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def _print_results(r: dict):
if "error" in r:
print(f"Error: {r['error']}")
return
s = r["strategy"]
p = r["performance"]
t = r["trades"]
period = r["period"]
w = 48
print(f"\n{'=' * w}")
print(f" Portfolio Simulation Results")
print(f"{'=' * w}")
print(f" Strategy")
print(f" Hold: {s['holding_days']}d | Delay: {s['buy_delay']}d | Size: {s['position_size']*100:.0f}% of cash")
print(f" Score ≥ {s['min_score']} | Cluster ≥ {s['min_cluster']}")
print(f" Round-trip cost: {s['roundtrip_cost_pct']:.2f}%")
print(f" Period: {period['start']}{period['end']} ({period['years']}y)")
print(f"{'' * w}")
print(f" Capital: ${p['initial_capital']:>12,.0f} → ${p['final_value']:>12,.2f}")
print(f" Total ret: {p['total_return_pct']:>+8.1f}%")
print(f" Ann. ret: {p['annualized_return_pct']:>+8.1f}% (SPY: {p['spy_annualized_pct']:+.1f}%)")
print(f" Excess: {p['excess_return_pct']:>+8.1f}%")
print(f" Max DD: {p['max_drawdown_pct']:>8.1f}%")
print(f" Sharpe: {p['sharpe']:>8.3f}")
print(f"{'' * w}")
print(f" Trades executed: {t['executed']:>6} / {t['signals_total']} signals")
print(f" Skipped (no px): {t['skipped_no_price']:>6}")
print(f" Win rate: {t['win_rate_pct']:>5.1f}%")
print(f" Avg net return: {t['avg_net_return_pct']:>+6.3f}% per trade")
print(f"{'=' * w}\n")
def main():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
parser = argparse.ArgumentParser(
description="Simulate insider-copytrade portfolio with realistic costs",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
# Strategy
parser.add_argument("--holding-days", type=int, default=7)
parser.add_argument("--buy-delay", type=int, default=1)
parser.add_argument("--position-size", type=float, default=0.10,
help="Fraction of available cash per trade (0.10 = 10%%)")
parser.add_argument("--min-score", type=float, default=0.0)
parser.add_argument("--min-cluster", type=int, default=1)
parser.add_argument("--capital", type=float, default=100_000.0)
# Costs
parser.add_argument("--spread", type=float, default=0.003,
help="Half bid-ask spread paid on entry AND exit (0.003 = 0.3%%)")
parser.add_argument("--slippage", type=float, default=0.002,
help="Entry slippage / market impact (0.002 = 0.2%%)")
parser.add_argument("--commission", type=float, default=0.001,
help="Per-trade commission as fraction of notional")
args = parser.parse_args()
from db.db import init_db
init_db()
strategy = Strategy(
holding_days=args.holding_days,
buy_delay=args.buy_delay,
position_size=args.position_size,
min_score=args.min_score,
min_cluster=args.min_cluster,
capital=args.capital,
spread=args.spread,
slippage=args.slippage,
commission=args.commission,
)
result = simulate(strategy)
_print_results(result)
if __name__ == "__main__":
main()