smaug/backtest/plot.py

"""
Generate performance plots for the insider-copytrade strategy.

    python main.py plot              # saves to plots/
    python backtest/plot.py          # same
"""

import logging
import os
import sys
from datetime import datetime

sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

import config
from backtest.simulate import Strategy, _load_all_prices, simulate

logger = logging.getLogger(__name__)

PLOTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "plots")


def _get_matplotlib():
    try:
        import matplotlib
        import matplotlib.pyplot as plt
        import matplotlib.dates as mdates
        import numpy as np
        return matplotlib, plt, mdates, np
    except ImportError:
        raise ImportError("pip install matplotlib numpy")


def plot_hp_heatmap(prices: dict, out_dir: str = PLOTS_DIR) -> str:
    """
    Sweep holding_days x round-trip cost, plot annualized excess vs SPY.
    Each cell is also annotated with the raw annualized return.
    """
    matplotlib, plt, mdates, np = _get_matplotlib()

    hold_days   = [3, 5, 7, 10, 14, 21, 30]
    rt_pcts     = [0.3, 0.5, 0.7, 1.0, 1.2, 1.5, 2.0]

    # Alpaca: zero commission. Decompose RT into spread + slippage only (50/50).
    # roundtrip = 2*spread + slippage  =>  spread = RT*0.25, slippage = RT*0.5
    # verify: 2*0.25 + 0.5 = 1.0 * RT ✓
    def _costs(rt):
        return dict(spread=rt * 0.25, slippage=rt * 0.5, commission=0)

    rows_excess = []
    rows_ann    = []
    total = len(hold_days) * len(rt_pcts)
    done  = 0

    for hd in hold_days:
        row_e, row_a = [], []
        for rt_pct in rt_pcts:
            rt = rt_pct / 100.0
            s = Strategy(holding_days=hd, buy_delay=1, **_costs(rt))
            r = simulate(s, prices=prices)
            perf = r.get("performance", {})
            row_e.append(perf.get("excess_return_pct", 0.0))
            row_a.append(perf.get("annualized_return_pct", 0.0))
            done += 1
            logger.info(
                f"[{done}/{total}] hold={hd}d rt={rt_pct}% "
                f"ann={row_a[-1]:.1f}% excess={row_e[-1]:+.1f}%"
            )
        rows_excess.append(row_e)
        rows_ann.append(row_a)

    Z_excess = np.array(rows_excess)
    Z_ann    = np.array(rows_ann)

    fig, axes = plt.subplots(1, 2, figsize=(15, 6))

    for ax, Z, title in [
        (axes[0], Z_excess, "Excess return vs SPY (annualised %)"),
        (axes[1], Z_ann,    "Strategy annualised return (%)"),
    ]:
        vmax = float(max(abs(Z.max()), abs(Z.min()), 5))
        if "Excess" in title:
            from matplotlib.colors import TwoSlopeNorm
            norm = TwoSlopeNorm(vmin=-vmax, vcenter=0, vmax=vmax)
        else:
            spy_approx = 16.0
            from matplotlib.colors import TwoSlopeNorm
            norm = TwoSlopeNorm(
                vmin=min(float(Z.min()), -5),
                vcenter=spy_approx,
                vmax=max(float(Z.max()), spy_approx + 5),
            )

        im = ax.imshow(Z, cmap="RdYlGn", norm=norm, aspect="auto")
        cb = plt.colorbar(im, ax=ax)
        cb.set_label("%")

        ax.set_xticks(range(len(rt_pcts)))
        ax.set_xticklabels([f"{r}%" for r in rt_pcts], fontsize=9)
        ax.set_yticks(range(len(hold_days)))
        ax.set_yticklabels([f"{h}d" for h in hold_days], fontsize=9)
        ax.set_xlabel("Round-trip transaction cost")
        ax.set_ylabel("Holding period")
        ax.set_title(title, fontsize=11)

        for i in range(len(hold_days)):
            for j in range(len(rt_pcts)):
                val = Z[i, j]
                txt = f"{val:+.1f}" if "Excess" in title else f"{val:.1f}"
                brightness = norm(val)
                color = "white" if brightness < 0.35 or brightness > 0.75 else "black"
                ax.text(j, i, txt, ha="center", va="center", fontsize=7.5, color=color)

    fig.suptitle(
        "HP sweep: Alpaca (zero commission), 1-day entry delay, 10% position size, all cap tiers",
        fontsize=12,
    )
    plt.tight_layout()

    os.makedirs(out_dir, exist_ok=True)
    out = os.path.join(out_dir, "hp_sweep.png")
    plt.savefig(out, dpi=150, bbox_inches="tight")
    plt.close()
    logger.info(f"Saved {out}")
    return out


def plot_equity_curves(prices: dict, out_dir: str = PLOTS_DIR) -> str:
    """
    Plot portfolio equity curves for several cost scenarios vs SPY buy-and-hold.
    """
    matplotlib, plt, mdates, np = _get_matplotlib()

    # Alpaca zero-commission costs by cap tier (spread + slippage only)
    scenarios = [
        {"label": "Large cap  (~0.2% RT)", "cap_tier": "large", "spread": 0.001,  "slippage": 0.001},
        {"label": "Mid cap    (~0.5% RT)", "cap_tier": "mid",   "spread": 0.0025, "slippage": 0.0025},
        {"label": "Small cap  (~0.8% RT)", "cap_tier": "small", "spread": 0.004,  "slippage": 0.004},
        {"label": "All tickers (0% RT)",   "cap_tier": None,    "spread": 0,      "slippage": 0},
    ]

    fig, ax = plt.subplots(figsize=(13, 7))

    colors  = ["#2ecc71", "#3498db", "#e67e22", "#aaaaaa"]
    sim_start = None
    last_curve_date = None

    for sc, color in zip(scenarios, colors):
        s = Strategy(
            holding_days=7, buy_delay=1,
            spread=sc["spread"], slippage=sc["slippage"], commission=0,
            cap_tier=sc["cap_tier"],
        )
        r = simulate(s, prices=prices)
        curve = r.get("equity_curve", [])
        if not curve:
            continue

        sim_start = sim_start or r["period"]["start"]
        last_curve_date = curve[-1][0]  # actual last signal date in this curve

        dates  = [datetime.strptime(d, "%Y-%m-%d") for d, _ in curve]
        values = [v for _, v in curve]
        base   = values[0]
        ax.plot(dates, [v / base * 100 for v in values],
                label=sc["label"], color=color, linewidth=1.8)

    # SPY buy-and-hold overlay — clamp to last data point of strategy curves
    spy_px = prices.get("SPY", {})
    if spy_px and sim_start and last_curve_date:
        spy_dates = sorted(d for d in spy_px if sim_start <= d <= last_curve_date)
        if spy_dates:
            base = spy_px[spy_dates[0]]
            ax.plot(
                [datetime.strptime(d, "%Y-%m-%d") for d in spy_dates],
                [spy_px[d] / base * 100 for d in spy_dates],
                label="SPY buy & hold", color="black", linewidth=2.2, linestyle="--",
            )

    ax.axhline(100, color="gray", linewidth=0.8, linestyle=":")
    ax.set_xlabel("Date", fontsize=11)
    ax.set_ylabel("Portfolio value (indexed to 100)", fontsize=11)
    ax.set_title(
        "Insider Copytrade: equity curves by cap tier, Alpaca costs  (7d hold, 1d delay, 10% position size)",
        fontsize=12,
    )
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.25)
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
    plt.xticks(rotation=30)

    plt.tight_layout()
    os.makedirs(out_dir, exist_ok=True)
    out = os.path.join(out_dir, "equity_curves.png")
    plt.savefig(out, dpi=150, bbox_inches="tight")
    plt.close()
    logger.info(f"Saved {out}")
    return out


def main():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
    )

    from db.db import init_db
    init_db()

    logger.info("Loading price cache...")
    prices = _load_all_prices()

    logger.info("Generating HP heatmap (49 simulations)...")
    p1 = plot_hp_heatmap(prices)

    logger.info("Generating equity curves (4 simulations)...")
    p2 = plot_equity_curves(prices)

    print(f"\nPlots saved:\n  {p1}\n  {p2}\n")


if __name__ == "__main__":
    main()