169 lines
5.7 KiB
Python
169 lines
5.7 KiB
Python
import logging
|
|
import math
|
|
from datetime import datetime, timedelta
|
|
|
|
import config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _load_signals_from_db(db_path: str, min_score: float, min_cluster_size: int) -> list[dict]:
|
|
import sqlite3
|
|
conn = sqlite3.connect(db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
rows = conn.execute(
|
|
"""
|
|
SELECT s.*, f.role FROM signals s
|
|
LEFT JOIN filings f ON f.ticker = s.ticker AND f.transaction_date = s.trigger_date
|
|
WHERE s.score >= ? AND s.cluster_size >= ?
|
|
""",
|
|
(min_score, min_cluster_size),
|
|
).fetchall()
|
|
conn.close()
|
|
return [dict(r) for r in rows]
|
|
|
|
|
|
def _first_close_on_or_after(price_data, target_date: datetime) -> float:
|
|
"""Return the closing price on the first trading day on or after target_date."""
|
|
for ts, row in price_data["Close"].items():
|
|
ts_date = ts.to_pydatetime().replace(tzinfo=None)
|
|
if ts_date.date() >= target_date.date():
|
|
return float(row)
|
|
raise ValueError(f"No price data on or after {target_date.date()}")
|
|
|
|
|
|
def _first_close_before(price_data, target_date: datetime) -> float:
|
|
"""Return the closing price on the last trading day before or on target_date."""
|
|
result = None
|
|
for ts, row in price_data["Close"].items():
|
|
ts_date = ts.to_pydatetime().replace(tzinfo=None)
|
|
if ts_date.date() <= target_date.date():
|
|
result = float(row)
|
|
if result is None:
|
|
raise ValueError(f"No price data on or before {target_date.date()}")
|
|
return result
|
|
|
|
|
|
def run_backtest(
|
|
db_path: str = None,
|
|
holding_days: int = None,
|
|
min_score: float = 0.0,
|
|
min_cluster_size: int = 1,
|
|
) -> dict:
|
|
try:
|
|
import yfinance as yf
|
|
except ImportError:
|
|
raise ImportError("yfinance not installed. Run: pip install yfinance")
|
|
|
|
db_path = db_path or config.DB_PATH
|
|
holding_days = holding_days or config.HOLDING_PERIOD_DAYS
|
|
|
|
signals = _load_signals_from_db(db_path, min_score, min_cluster_size)
|
|
|
|
if not signals:
|
|
logger.warning("No signals found matching criteria")
|
|
return {}
|
|
|
|
results = []
|
|
spy_cache: dict[tuple, float] = {}
|
|
|
|
for signal in signals:
|
|
ticker = signal["ticker"]
|
|
entry_date_str = signal["trigger_date"]
|
|
|
|
try:
|
|
entry_date = datetime.strptime(entry_date_str, "%Y-%m-%d")
|
|
except ValueError:
|
|
continue
|
|
|
|
exit_date = entry_date + timedelta(days=holding_days)
|
|
|
|
try:
|
|
stock_data = yf.download(
|
|
ticker,
|
|
start=entry_date.strftime("%Y-%m-%d"),
|
|
end=(exit_date + timedelta(days=5)).strftime("%Y-%m-%d"),
|
|
progress=False,
|
|
auto_adjust=True,
|
|
)
|
|
if stock_data.empty:
|
|
logger.debug(f"No price data for {ticker}")
|
|
continue
|
|
|
|
entry_price = _first_close_on_or_after(stock_data, entry_date)
|
|
exit_price = _first_close_before(stock_data, exit_date)
|
|
stock_return = (exit_price - entry_price) / entry_price
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Failed to get data for {ticker}: {e}")
|
|
continue
|
|
|
|
period_key = (entry_date_str, holding_days)
|
|
if period_key not in spy_cache:
|
|
try:
|
|
spy_data = yf.download(
|
|
"SPY",
|
|
start=entry_date.strftime("%Y-%m-%d"),
|
|
end=(exit_date + timedelta(days=5)).strftime("%Y-%m-%d"),
|
|
progress=False,
|
|
auto_adjust=True,
|
|
)
|
|
if not spy_data.empty:
|
|
spy_entry = _first_close_on_or_after(spy_data, entry_date)
|
|
spy_exit = _first_close_before(spy_data, exit_date)
|
|
spy_cache[period_key] = (spy_exit - spy_entry) / spy_entry
|
|
else:
|
|
spy_cache[period_key] = 0.0
|
|
except Exception:
|
|
spy_cache[period_key] = 0.0
|
|
|
|
spy_return = spy_cache[period_key]
|
|
alpha = stock_return - spy_return
|
|
|
|
results.append({
|
|
"ticker": ticker,
|
|
"entry_date": entry_date_str,
|
|
"stock_return": round(stock_return, 4),
|
|
"spy_return": round(spy_return, 4),
|
|
"alpha": round(alpha, 4),
|
|
"cluster_size": signal["cluster_size"],
|
|
"score": signal["score"],
|
|
})
|
|
|
|
if not results:
|
|
return {"error": "No results computed"}
|
|
|
|
returns = [r["stock_return"] for r in results]
|
|
alphas = [r["alpha"] for r in results]
|
|
win_rate = sum(1 for r in returns if r > 0) / len(returns)
|
|
avg_return = sum(returns) / len(returns)
|
|
avg_alpha = sum(alphas) / len(alphas)
|
|
std_dev = math.sqrt(sum((r - avg_return) ** 2 for r in returns) / len(returns))
|
|
sharpe = (avg_return / std_dev * math.sqrt(252 / holding_days)) if std_dev > 0 else 0.0
|
|
|
|
return {
|
|
"total_signals": len(results),
|
|
"win_rate": round(win_rate, 4),
|
|
"avg_return": round(avg_return, 4),
|
|
"avg_alpha_vs_spy": round(avg_alpha, 4),
|
|
"sharpe_ratio": round(sharpe, 4),
|
|
"holding_days": holding_days,
|
|
"results": results,
|
|
}
|
|
|
|
|
|
def print_summary(summary: dict):
|
|
if "error" in summary:
|
|
print(f"Error: {summary['error']}")
|
|
return
|
|
width = 40
|
|
print(f"\n{'=' * width}")
|
|
print(f"Backtest Results ({summary['holding_days']}-day hold)")
|
|
print(f"{'=' * width}")
|
|
print(f"Total signals: {summary['total_signals']}")
|
|
print(f"Win rate: {summary['win_rate']:.1%}")
|
|
print(f"Avg return: {summary['avg_return']:.2%}")
|
|
print(f"Avg alpha vs SPY: {summary['avg_alpha_vs_spy']:.2%}")
|
|
print(f"Sharpe ratio: {summary['sharpe_ratio']:.2f}")
|
|
print(f"{'=' * width}\n")
|