smaug/backtest/plot.py

"""
Generate performance plots for the insider-copytrade strategy.

    python main.py plot              # saves to plots/
    python backtest/plot.py          # same
"""

import logging
import os
import sys
from datetime import datetime

sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

import config
from backtest.simulate import Strategy, _load_all_prices, _fetch_market_caps, simulate
from db.db import get_signals_for_backtest

logger = logging.getLogger(__name__)

PLOTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "plots")


def _get_matplotlib():
    try:
        import matplotlib
        import matplotlib.pyplot as plt
        import matplotlib.dates as mdates
        import numpy as np
        return matplotlib, plt, mdates, np
    except ImportError:
        raise ImportError("pip install matplotlib numpy")


def plot_hp_heatmap(prices: dict, out_dir: str = PLOTS_DIR, signals=None, market_caps=None) -> str:
    """
    6-panel heatmap: one panel per cap tier (+ theoretical + all-cap).
    Axes: holding_days (rows) x buy_delay (cols).
    Color: annualised excess return vs SPY.
    """
    matplotlib, plt, mdates, np = _get_matplotlib()
    from matplotlib.colors import TwoSlopeNorm

    hold_days  = [3, 5, 7, 10, 14, 21, 30]
    buy_delays = [0, 1, 2, 3]

    # Cap tier definitions: (label, cap_tier, spread, slippage)
    # Costs based on SEC small-cap liquidity study (2013), Nasdaq spread data (2021),
    # and Frazzini/Israel/Moskowitz "Trading Costs" (AQR, 2018).
    # Alpaca charges zero commission. OTC/Pink Sheet stocks cannot be opened on Alpaca
    # (close-only), so micro-cap signals overlap heavily with untradeable names.
    tiers = [
        ("Theoretical (0% RT, all)",       None,    0.000,  0.000),
        ("All cap (~1% RT)",               None,    0.003,  0.004),
        ("Large cap (~0.2% RT)",           "large", 0.0005, 0.001),
        ("Mid cap (~0.5% RT)",             "mid",   0.0015, 0.002),
        ("Small cap (~1.5% RT)",           "small", 0.005,  0.005),
        ("Micro cap (~5% RT, if listed)",  "micro", 0.015,  0.020),
    ]

    total = len(tiers) * len(hold_days) * len(buy_delays)
    done  = 0
    tier_matrices = []

    for label, cap_tier, spread, slippage in tiers:
        Z = []
        for hd in hold_days:
            row = []
            for delay in buy_delays:
                s = Strategy(
                    holding_days=hd, buy_delay=delay,
                    spread=spread, slippage=slippage, commission=0,
                    cap_tier=cap_tier,
                )
                r = simulate(s, prices=prices, _signals=signals, _market_caps=market_caps)
                excess = r.get("performance", {}).get("excess_return_pct", 0.0)
                row.append(excess)
                done += 1
                print(f"  [{done}/{total}] {label}  hold={hd}d delay={delay}d  excess={excess:+.1f}%", flush=True)
            Z.append(row)
        tier_matrices.append((label, np.array(Z)))

    # Global color scale so all panels are comparable
    all_vals = np.concatenate([Z.flatten() for _, Z in tier_matrices])
    vmax = float(max(abs(all_vals.max()), abs(all_vals.min()), 10))
    norm = TwoSlopeNorm(vmin=-vmax, vcenter=0, vmax=vmax)

    fig, axes = plt.subplots(2, 3, figsize=(16, 9))
    axes_flat = axes.flatten()

    for ax, (label, Z) in zip(axes_flat, tier_matrices):
        im = ax.imshow(Z, cmap="RdYlGn", norm=norm, aspect="auto")

        ax.set_xticks(range(len(buy_delays)))
        ax.set_xticklabels([f"{d}d" for d in buy_delays], fontsize=9)
        ax.set_yticks(range(len(hold_days)))
        ax.set_yticklabels([f"{h}d" for h in hold_days], fontsize=9)
        ax.set_xlabel("Entry delay", fontsize=9)
        ax.set_ylabel("Holding period", fontsize=9)
        ax.set_title(label, fontsize=10, fontweight="bold")

        for i in range(len(hold_days)):
            for j in range(len(buy_delays)):
                val = Z[i, j]
                brightness = norm(val)
                color = "white" if brightness < 0.3 or brightness > 0.75 else "black"
                ax.text(j, i, f"{val:+.1f}", ha="center", va="center",
                        fontsize=8, color=color)

    fig.suptitle(
        "HP sweep: holding period x entry delay, by cap tier  (Alpaca, zero commission)",
        fontsize=13,
    )
    plt.tight_layout(rect=[0, 0, 0.88, 1])

    # Shared colorbar in reserved right margin — avoids overlapping panels
    cbar_ax = fig.add_axes([0.905, 0.15, 0.018, 0.65])
    fig.colorbar(
        plt.cm.ScalarMappable(norm=norm, cmap="RdYlGn"),
        cax=cbar_ax, label="Annualised excess return vs SPY (%)",
    )

    os.makedirs(out_dir, exist_ok=True)
    out = os.path.join(out_dir, "hp_sweep.png")
    plt.savefig(out, dpi=150, bbox_inches="tight")
    plt.close()
    logger.info(f"Saved {out}")
    return out


def plot_equity_curves(prices: dict, out_dir: str = PLOTS_DIR, signals=None, market_caps=None) -> str:
    """
    Plot portfolio equity curves for several cost scenarios vs SPY buy-and-hold.
    """
    matplotlib, plt, mdates, np = _get_matplotlib()

    # Realistic Alpaca costs by cap tier (zero commission, spread + slippage only).
    # Sources: SEC small-cap liquidity study (2013); Nasdaq spread data (2021);
    # Frazzini/Israel/Moskowitz "Trading Costs" AQR (2018).
    # Micro-cap: Alpaca does not allow new positions in OTC/Pink Sheet stocks — most
    # micro-cap names fall in this category and are simply not tradeable.
    scenarios = [
        {"label": "Large cap  (~0.2% RT)", "cap_tier": "large", "spread": 0.0005, "slippage": 0.001},
        {"label": "Mid cap    (~0.5% RT)", "cap_tier": "mid",   "spread": 0.0015, "slippage": 0.002},
        {"label": "Small cap  (~1.5% RT)", "cap_tier": "small", "spread": 0.005,  "slippage": 0.005},
        {"label": "Micro cap  (~5% RT, if listed)", "cap_tier": "micro", "spread": 0.015, "slippage": 0.020},
    ]

    fig, ax = plt.subplots(figsize=(13, 7))

    colors  = ["#2ecc71", "#3498db", "#e67e22", "#e74c3c"]
    sim_start = None
    last_curve_date = None  # earliest end across all scenarios — all curves clipped here

    # First pass: simulate and find common end date
    raw_curves = []
    for sc, color in zip(scenarios, colors):
        s = Strategy(
            holding_days=7, buy_delay=1,
            spread=sc["spread"], slippage=sc["slippage"], commission=0,
            cap_tier=sc["cap_tier"],
        )
        print(f"  equity curve: {sc['label']}...", flush=True)
        r = simulate(s, prices=prices, _signals=signals, _market_caps=market_caps)
        curve = r.get("equity_curve", [])
        raw_curves.append((sc, color, curve, r))
        if curve:
            sim_start = sim_start or r["period"]["start"]
            end = curve[-1][0]
            last_curve_date = min(last_curve_date, end) if last_curve_date else end

    # Second pass: plot all curves clipped to the minimum end date
    for sc, color, curve, r in raw_curves:
        if not curve:
            continue
        curve = [(d, v) for d, v in curve if d <= last_curve_date]
        if not curve:
            continue
        dates  = [datetime.strptime(d, "%Y-%m-%d") for d, _ in curve]
        values = [v for _, v in curve]
        base   = values[0]
        ax.plot(dates, [v / base * 100 for v in values],
                label=sc["label"], color=color, linewidth=1.8)

    # SPY buy-and-hold overlay — clamp to last data point of strategy curves
    spy_entry = prices.get("SPY")
    if spy_entry and spy_entry[0] and sim_start and last_curve_date:
        spy_dates_all, spy_closes_all = spy_entry
        spy_pairs = [(d, c) for d, c in zip(spy_dates_all, spy_closes_all)
                     if sim_start <= d <= last_curve_date]
        if spy_pairs:
            base = spy_pairs[0][1]
            ax.plot(
                [datetime.strptime(d, "%Y-%m-%d") for d, _ in spy_pairs],
                [c / base * 100 for _, c in spy_pairs],
                label="SPY buy & hold", color="black", linewidth=2.2, linestyle="--",
            )

    ax.axhline(100, color="gray", linewidth=0.8, linestyle=":")
    ax.set_xlabel("Date", fontsize=11)
    ax.set_ylabel("Portfolio value (indexed to 100)", fontsize=11)
    ax.set_title(
        "Insider Copytrade: equity curves by cap tier, Alpaca costs  (7d hold, 1d delay, 10% position size)",
        fontsize=12,
    )
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.25)
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
    plt.xticks(rotation=30)

    plt.tight_layout()
    os.makedirs(out_dir, exist_ok=True)
    out = os.path.join(out_dir, "equity_curves.png")
    plt.savefig(out, dpi=150, bbox_inches="tight")
    plt.close()
    logger.info(f"Saved {out}")
    return out


def plot_position_size(prices: dict, out_dir: str = PLOTS_DIR, signals=None, market_caps=None) -> str:
    """
    Line chart: annualised return vs position size for each cap tier.
    Shows whether 10% is conservative or optimal.
    """
    matplotlib, plt, mdates, np = _get_matplotlib()

    pos_sizes = [0.03, 0.05, 0.07, 0.10, 0.15, 0.20, 0.25]

    tiers = [
        ("Large (~0.2% RT)",        "large", 0.0005, 0.001),
        ("Mid (~0.5% RT)",          "mid",   0.0015, 0.002),
        ("Small (~1.5% RT)",        "small", 0.005,  0.005),
        ("Micro (~5% RT, if lsted)","micro", 0.015,  0.020),
    ]

    colors = ["#2ecc71", "#3498db", "#e67e22", "#e74c3c"]

    fig, ax = plt.subplots(figsize=(10, 6))

    spy_ann = None
    total = len(tiers) * len(pos_sizes)
    done = 0

    for (label, cap_tier, spread, slippage), color in zip(tiers, colors):
        ann_returns = []
        for ps in pos_sizes:
            s = Strategy(
                holding_days=7, buy_delay=1,
                spread=spread, slippage=slippage, commission=0,
                cap_tier=cap_tier, position_size=ps,
            )
            r = simulate(s, prices=prices, _signals=signals, _market_caps=market_caps)
            perf = r.get("performance", {})
            ann_returns.append(perf.get("annualized_return_pct", 0.0))
            if spy_ann is None:
                spy_ann = perf.get("spy_annualized_pct", 16.0)
            done += 1
            print(f"  [{done}/{total}] {label} pos={ps:.0%} ann={ann_returns[-1]:.1f}%", flush=True)

        ax.plot([p * 100 for p in pos_sizes], ann_returns,
                label=label, color=color, linewidth=2, marker="o", markersize=5)

    if spy_ann is not None:
        ax.axhline(spy_ann, color="black", linewidth=1.8, linestyle="--",
                   label=f"SPY buy & hold ({spy_ann:.1f}%)")

    ax.axvline(10, color="gray", linewidth=1, linestyle=":", alpha=0.7)
    ax.text(10.3, ax.get_ylim()[0] + 1, "default\n(10%)", fontsize=8, color="gray")

    ax.set_xlabel("Position size (% of available cash per signal)", fontsize=11)
    ax.set_ylabel("Annualised return (%)", fontsize=11)
    ax.set_title(
        "Position size sensitivity by cap tier  (7d hold, 1d delay, Alpaca costs)",
        fontsize=12,
    )
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.25)

    plt.tight_layout()
    os.makedirs(out_dir, exist_ok=True)
    out = os.path.join(out_dir, "position_size.png")
    plt.savefig(out, dpi=150, bbox_inches="tight")
    plt.close()
    logger.info(f"Saved {out}")
    return out


def main():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
    )

    from db.db import init_db
    init_db()

    logger.info("Loading price cache...")
    prices = _load_all_prices()

    logger.info("Pre-fetching signals and market caps...")
    signals = get_signals_for_backtest(0.0, 1)
    tickers = list({s["ticker"] for s in signals})
    market_caps = _fetch_market_caps(tickers)
    logger.info(f"  {len(signals)} signals, {len(market_caps)} market caps cached")

    logger.info("Generating HP heatmap (168 simulations)...")
    p1 = plot_hp_heatmap(prices, signals=signals, market_caps=market_caps)

    logger.info("Generating equity curves (4 simulations)...")
    p2 = plot_equity_curves(prices, signals=signals, market_caps=market_caps)

    logger.info("Generating position size sensitivity (28 simulations)...")
    p3 = plot_position_size(prices, signals=signals, market_caps=market_caps)

    print(f"\nPlots saved:\n  {p1}\n  {p2}\n  {p3}\n")


if __name__ == "__main__":
    main()