smaug/backtest/plot.py
Dominik Roth d0e98b9cb7 feat: cap-tier filtering, Alpaca cost model, README cleanup
- simulate.py: --cap-tier large|mid|small|micro; yfinance market cap fetch
  with DB cache (ticker_meta table); argv fix for main.py dispatch
- plot.py: equity curves now show cap tiers with Alpaca costs (zero commission);
  HP sweep uses Alpaca cost decomposition; SPY line clamped to last strategy date
- db/models.py: TickerMeta table
- db/db.py: get_cached_market_caps, upsert_market_caps
- README: add --cap-tier to simulate docs; backfill note (~3 days for 2 years
  at SEC 10 req/s limit); remove duplicate setup block; remove em-dashes in prose;
  results table tilde estimates to be updated once cap-tier sims complete

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 18:10:09 +02:00

225 lines
7.6 KiB
Python

"""
Generate performance plots for the insider-copytrade strategy.
python main.py plot # saves to plots/
python backtest/plot.py # same
"""
import logging
import os
import sys
from datetime import datetime
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import config
from backtest.simulate import Strategy, _load_all_prices, simulate
logger = logging.getLogger(__name__)
PLOTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "plots")
def _get_matplotlib():
try:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
return matplotlib, plt, mdates, np
except ImportError:
raise ImportError("pip install matplotlib numpy")
def plot_hp_heatmap(prices: dict, out_dir: str = PLOTS_DIR) -> str:
"""
Sweep holding_days x round-trip cost, plot annualized excess vs SPY.
Each cell is also annotated with the raw annualized return.
"""
matplotlib, plt, mdates, np = _get_matplotlib()
hold_days = [3, 5, 7, 10, 14, 21, 30]
rt_pcts = [0.3, 0.5, 0.7, 1.0, 1.2, 1.5, 2.0]
# Alpaca: zero commission. Decompose RT into spread + slippage only (50/50).
# roundtrip = 2*spread + slippage => spread = RT*0.25, slippage = RT*0.5
# verify: 2*0.25 + 0.5 = 1.0 * RT ✓
def _costs(rt):
return dict(spread=rt * 0.25, slippage=rt * 0.5, commission=0)
rows_excess = []
rows_ann = []
total = len(hold_days) * len(rt_pcts)
done = 0
for hd in hold_days:
row_e, row_a = [], []
for rt_pct in rt_pcts:
rt = rt_pct / 100.0
s = Strategy(holding_days=hd, buy_delay=1, **_costs(rt))
r = simulate(s, prices=prices)
perf = r.get("performance", {})
row_e.append(perf.get("excess_return_pct", 0.0))
row_a.append(perf.get("annualized_return_pct", 0.0))
done += 1
logger.info(
f"[{done}/{total}] hold={hd}d rt={rt_pct}% "
f"ann={row_a[-1]:.1f}% excess={row_e[-1]:+.1f}%"
)
rows_excess.append(row_e)
rows_ann.append(row_a)
Z_excess = np.array(rows_excess)
Z_ann = np.array(rows_ann)
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
for ax, Z, title in [
(axes[0], Z_excess, "Excess return vs SPY (annualised %)"),
(axes[1], Z_ann, "Strategy annualised return (%)"),
]:
vmax = float(max(abs(Z.max()), abs(Z.min()), 5))
if "Excess" in title:
from matplotlib.colors import TwoSlopeNorm
norm = TwoSlopeNorm(vmin=-vmax, vcenter=0, vmax=vmax)
else:
spy_approx = 16.0
from matplotlib.colors import TwoSlopeNorm
norm = TwoSlopeNorm(
vmin=min(float(Z.min()), -5),
vcenter=spy_approx,
vmax=max(float(Z.max()), spy_approx + 5),
)
im = ax.imshow(Z, cmap="RdYlGn", norm=norm, aspect="auto")
cb = plt.colorbar(im, ax=ax)
cb.set_label("%")
ax.set_xticks(range(len(rt_pcts)))
ax.set_xticklabels([f"{r}%" for r in rt_pcts], fontsize=9)
ax.set_yticks(range(len(hold_days)))
ax.set_yticklabels([f"{h}d" for h in hold_days], fontsize=9)
ax.set_xlabel("Round-trip transaction cost")
ax.set_ylabel("Holding period")
ax.set_title(title, fontsize=11)
for i in range(len(hold_days)):
for j in range(len(rt_pcts)):
val = Z[i, j]
txt = f"{val:+.1f}" if "Excess" in title else f"{val:.1f}"
brightness = norm(val)
color = "white" if brightness < 0.35 or brightness > 0.75 else "black"
ax.text(j, i, txt, ha="center", va="center", fontsize=7.5, color=color)
fig.suptitle(
"HP sweep: Alpaca (zero commission), 1-day entry delay, 10% position size, all cap tiers",
fontsize=12,
)
plt.tight_layout()
os.makedirs(out_dir, exist_ok=True)
out = os.path.join(out_dir, "hp_sweep.png")
plt.savefig(out, dpi=150, bbox_inches="tight")
plt.close()
logger.info(f"Saved {out}")
return out
def plot_equity_curves(prices: dict, out_dir: str = PLOTS_DIR) -> str:
"""
Plot portfolio equity curves for several cost scenarios vs SPY buy-and-hold.
"""
matplotlib, plt, mdates, np = _get_matplotlib()
# Alpaca zero-commission costs by cap tier (spread + slippage only)
scenarios = [
{"label": "Large cap (~0.2% RT)", "cap_tier": "large", "spread": 0.001, "slippage": 0.001},
{"label": "Mid cap (~0.5% RT)", "cap_tier": "mid", "spread": 0.0025, "slippage": 0.0025},
{"label": "Small cap (~0.8% RT)", "cap_tier": "small", "spread": 0.004, "slippage": 0.004},
{"label": "All tickers (0% RT)", "cap_tier": None, "spread": 0, "slippage": 0},
]
fig, ax = plt.subplots(figsize=(13, 7))
colors = ["#2ecc71", "#3498db", "#e67e22", "#aaaaaa"]
sim_start = None
last_curve_date = None
for sc, color in zip(scenarios, colors):
s = Strategy(
holding_days=7, buy_delay=1,
spread=sc["spread"], slippage=sc["slippage"], commission=0,
cap_tier=sc["cap_tier"],
)
r = simulate(s, prices=prices)
curve = r.get("equity_curve", [])
if not curve:
continue
sim_start = sim_start or r["period"]["start"]
last_curve_date = curve[-1][0] # actual last signal date in this curve
dates = [datetime.strptime(d, "%Y-%m-%d") for d, _ in curve]
values = [v for _, v in curve]
base = values[0]
ax.plot(dates, [v / base * 100 for v in values],
label=sc["label"], color=color, linewidth=1.8)
# SPY buy-and-hold overlay — clamp to last data point of strategy curves
spy_px = prices.get("SPY", {})
if spy_px and sim_start and last_curve_date:
spy_dates = sorted(d for d in spy_px if sim_start <= d <= last_curve_date)
if spy_dates:
base = spy_px[spy_dates[0]]
ax.plot(
[datetime.strptime(d, "%Y-%m-%d") for d in spy_dates],
[spy_px[d] / base * 100 for d in spy_dates],
label="SPY buy & hold", color="black", linewidth=2.2, linestyle="--",
)
ax.axhline(100, color="gray", linewidth=0.8, linestyle=":")
ax.set_xlabel("Date", fontsize=11)
ax.set_ylabel("Portfolio value (indexed to 100)", fontsize=11)
ax.set_title(
"Insider Copytrade: equity curves by cap tier, Alpaca costs (7d hold, 1d delay, 10% position size)",
fontsize=12,
)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.25)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
plt.xticks(rotation=30)
plt.tight_layout()
os.makedirs(out_dir, exist_ok=True)
out = os.path.join(out_dir, "equity_curves.png")
plt.savefig(out, dpi=150, bbox_inches="tight")
plt.close()
logger.info(f"Saved {out}")
return out
def main():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
from db.db import init_db
init_db()
logger.info("Loading price cache...")
prices = _load_all_prices()
logger.info("Generating HP heatmap (49 simulations)...")
p1 = plot_hp_heatmap(prices)
logger.info("Generating equity curves (4 simulations)...")
p2 = plot_equity_curves(prices)
print(f"\nPlots saved:\n {p1}\n {p2}\n")
if __name__ == "__main__":
main()