smaug/backtest/backtest.py
Claude 7e9221a914 feat: add PLAN.md and insider copytrade POC implementation
- PLAN.md: full implementation plan from issue
- config.py: configurable thresholds, API keys via .env
- ingestion/: EDGAR RSS poller + Form 4 XML parser
- db/: SQLite schema + interface (WAL mode)
- signals/: filter engine (buy/10b5-1/value/role) + cluster detector
- alerts/: Slack webhook alert with score gating
- broker/: Alpaca paper/live trade execution
- backtest/: historical signal backtesting with yfinance
- main.py: CLI entrypoint (run | fetch-once | backtest)
2026-05-04 16:15:22 +00:00

148 lines
4.8 KiB
Python

import logging
from datetime import datetime, timedelta
from typing import Optional
import sqlite3
import config
logger = logging.getLogger(__name__)
def _load_signals_from_db(db_path: str) -> list[dict]:
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT s.*, f.role FROM signals s "
"LEFT JOIN filings f ON f.ticker = s.ticker AND f.transaction_date = s.trigger_date "
"WHERE s.cluster_size >= 1"
).fetchall()
conn.close()
return [dict(r) for r in rows]
def run_backtest(
db_path: str = None,
holding_days: int = None,
min_score: float = 0.0,
min_cluster_size: int = 1,
) -> dict:
try:
import yfinance as yf
except ImportError:
raise ImportError("yfinance not installed. Run: pip install yfinance")
db_path = db_path or config.DB_PATH
holding_days = holding_days or config.HOLDING_PERIOD_DAYS
signals = _load_signals_from_db(db_path)
signals = [s for s in signals if s["score"] >= min_score and s["cluster_size"] >= min_cluster_size]
if not signals:
logger.warning("No signals found matching criteria")
return {}
results = []
spy_returns = {}
for signal in signals:
ticker = signal["ticker"]
entry_date_str = signal["trigger_date"]
try:
entry_date = datetime.strptime(entry_date_str, "%Y-%m-%d")
except ValueError:
continue
exit_date = entry_date + timedelta(days=holding_days)
try:
stock_data = yf.download(
ticker,
start=(entry_date - timedelta(days=5)).strftime("%Y-%m-%d"),
end=(exit_date + timedelta(days=5)).strftime("%Y-%m-%d"),
progress=False,
auto_adjust=True,
)
if stock_data.empty:
continue
entry_price = float(stock_data["Close"].iloc[0])
exit_price = float(stock_data["Close"].iloc[-1])
stock_return = (exit_price - entry_price) / entry_price
except Exception as e:
logger.debug(f"Failed to get data for {ticker}: {e}")
continue
period_key = (entry_date_str, holding_days)
if period_key not in spy_returns:
try:
spy_data = yf.download(
"SPY",
start=(entry_date - timedelta(days=5)).strftime("%Y-%m-%d"),
end=(exit_date + timedelta(days=5)).strftime("%Y-%m-%d"),
progress=False,
auto_adjust=True,
)
if not spy_data.empty:
spy_entry = float(spy_data["Close"].iloc[0])
spy_exit = float(spy_data["Close"].iloc[-1])
spy_returns[period_key] = (spy_exit - spy_entry) / spy_entry
else:
spy_returns[period_key] = 0.0
except Exception:
spy_returns[period_key] = 0.0
spy_return = spy_returns.get(period_key, 0.0)
alpha = stock_return - spy_return
results.append({
"ticker": ticker,
"entry_date": entry_date_str,
"stock_return": round(stock_return, 4),
"spy_return": round(spy_return, 4),
"alpha": round(alpha, 4),
"cluster_size": signal["cluster_size"],
"score": signal["score"],
})
if not results:
return {"error": "No results computed"}
returns = [r["stock_return"] for r in results]
alphas = [r["alpha"] for r in results]
win_rate = sum(1 for r in returns if r > 0) / len(returns)
avg_return = sum(returns) / len(returns)
avg_alpha = sum(alphas) / len(alphas)
import math
std_dev = math.sqrt(sum((r - avg_return) ** 2 for r in returns) / len(returns))
sharpe = (avg_return / std_dev * math.sqrt(252 / holding_days)) if std_dev > 0 else 0.0
summary = {
"total_signals": len(results),
"win_rate": round(win_rate, 4),
"avg_return": round(avg_return, 4),
"avg_alpha_vs_spy": round(avg_alpha, 4),
"sharpe_ratio": round(sharpe, 4),
"holding_days": holding_days,
"results": results,
}
return summary
def print_summary(summary: dict):
if "error" in summary:
print(f"Error: {summary['error']}")
return
print(f"\n{'='*40}")
print(f"Backtest Results ({summary['holding_days']}-day hold)")
print(f"{'='*40}")
print(f"Total signals: {summary['total_signals']}")
print(f"Win rate: {summary['win_rate']:.1%}")
print(f"Avg return: {summary['avg_return']:.2%}")
print(f"Avg alpha vs SPY: {summary['avg_alpha_vs_spy']:.2%}")
print(f"Sharpe ratio: {summary['sharpe_ratio']:.2f}")
print(f"{'='*40}\n")