Bithumb/scripts/2_run_causal_sim.py

#!/usr/bin/env python3
"""2단계: 인과 기법 sim + 차트 (1단계와 동일 거래 기간·초기 자본)."""

from __future__ import annotations

import argparse
import logging
import sys
import time
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))

from bithumb.config import load_settings
from bithumb.data.candle_loader import load_candles
from bithumb.data.intervals import interval_label
from bithumb.evaluation.causal_sim import (
    best_technique_chart_path,
    build_causal_sim_report,
    pick_best_technique_row,
    render_best_technique_comparison_chart,
    render_causal_sim_html,
    render_technique_sim_chart,
    run_technique_causal_sim,
    save_causal_sim_report,
    technique_sim_chart_path,
)
from bithumb.techniques.runner import load_ground_truth, load_technique_results


def _configure_logging(verbose: bool) -> None:
    """로깅 레벨을 설정한다."""
    level = logging.DEBUG if verbose else logging.INFO
    logging.basicConfig(
        level=level,
        format="%(asctime)s [%(levelname)s] %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )


def _print_progress(phase: str, current: int, total: int, detail: str) -> None:
    """진행률을 stdout에 출력한다."""
    pct = current / total * 100.0 if total else 100.0
    msg = f"[{phase}] {current}/{total} ({pct:.1f}%) — {detail}"
    print(msg, flush=True)
    logging.info(msg)


def main() -> int:
    """CLI 진입점."""
    parser = argparse.ArgumentParser(description="2단계: 인과 기법 sim + 차트")
    parser.add_argument(
        "--techniques",
        type=str,
        default=None,
        help="대상 기법 ID (쉼표 구분). 기본: 전체",
    )
    parser.add_argument(
        "--no-charts",
        action="store_true",
        help="개별 sim 차트 HTML 생략 (요약 리포트만)",
    )
    parser.add_argument("-v", "--verbose", action="store_true")
    args = parser.parse_args()

    _configure_logging(args.verbose)
    settings = load_settings()
    gt_result = load_ground_truth(settings.ground_truth_file)
    gt_meta = gt_result.get("meta", {})

    technique_ids = None
    if args.techniques:
        technique_ids = [t.strip() for t in args.techniques.split(",") if t.strip()]

    results = load_technique_results(settings.techniques_dir, technique_ids)
    if not results:
        logging.error(
            "기법 결과 없음: %s — 먼저 2_run_techniques.py 실행",
            settings.techniques_dir,
        )
        return 1

    stage1_sim = gt_result.get("sim_pnl")
    if not stage1_sim:
        logging.warning("GT JSON에 1단계 sim_pnl 없음 — 1_ground_truth_sim.py 권장")

    df = load_candles(
        db_path=settings.db_path,
        symbol=settings.symbol,
        interval_min=settings.gt_interval_min,
        lookback_days=settings.gt_lookback_days,
    )
    last_close = float(df["close"].iloc[-1])
    data_end = gt_meta.get("data_to")
    analysis_dir = settings.causal_sim_report_json.parent

    print("\n=== 2단계 인과 sim ===", flush=True)
    print(
        f"거래 기간: 최근 {settings.gt_sim_lookback_days}일 | "
        f"초기 {settings.gt_initial_cash_krw:,.0f}원 | "
        f"기법 {len(results)}개",
        flush=True,
    )

    sim_pnls: dict[str, dict] = {}
    total = len(results)
    t0 = time.monotonic()

    for idx, result in enumerate(results, start=1):
        sim_pnl = run_technique_causal_sim(
            result,
            initial_cash_krw=settings.gt_initial_cash_krw,
            fee_rate=settings.gt_trading_fee_rate,
            sim_lookback_days=settings.gt_sim_lookback_days,
            data_end=data_end,
            last_mark_price=last_close,
        )
        sim_pnls[result.technique_id] = sim_pnl
        _print_progress(
            "sim",
            idx,
            total,
            f"{result.technique_id} → {sim_pnl.get('total_return_pct', 0):+.2f}%",
        )

        if not args.no_charts:
            chart_path = technique_sim_chart_path(analysis_dir, result.technique_id)
            render_technique_sim_chart(
                db_path=settings.db_path,
                symbol=settings.symbol,
                gt_meta=gt_meta,
                result=result,
                sim_pnl=sim_pnl,
                output_path=chart_path,
                chart_lookback_days=settings.gt_sim_lookback_days,
            )
            _print_progress("chart", idx, total, str(chart_path.name))

    report = build_causal_sim_report(results, gt_result, settings.symbol, sim_pnls)
    report["meta"] = {
        "interval_label": interval_label(settings.gt_interval_min),
        "initial_cash_krw": settings.gt_initial_cash_krw,
        "sim_lookback_days": settings.gt_sim_lookback_days,
    }
    json_path = save_causal_sim_report(report, settings.causal_sim_report_json)
    html_path = render_causal_sim_html(report, settings.causal_sim_report_html)

    best_row = pick_best_technique_row(report)
    if best_row:
        best_result = next(
            (r for r in results if r.technique_id == best_row["technique_id"]),
            None,
        )
        if best_result is not None:
            best_chart = best_technique_chart_path(analysis_dir)
            render_best_technique_comparison_chart(
                db_path=settings.db_path,
                symbol=settings.symbol,
                gt_result=gt_result,
                result=best_result,
                sim_pnl=sim_pnls[best_result.technique_id],
                output_path=best_chart,
                chart_lookback_days=settings.download_days,
            )
            print(
                f"1단계 v3 대조 차트: {best_chart} "
                f"({best_result.technique_name}, "
                f"{sim_pnls[best_result.technique_id].get('total_return_pct', 0):+.2f}%)",
                flush=True,
            )

    elapsed = time.monotonic() - t0
    print(f"\n=== 2단계 인과 sim 완료 ({elapsed/60:.1f}분) ===", flush=True)
    if stage1_sim:
        print(
            f"1단계 벤치마크(v3): {stage1_sim.get('total_return_pct', 0):+.2f}% "
            f"({stage1_sim.get('period_from', '')} ~ {stage1_sim.get('period_to', '')})",
            flush=True,
        )
    top = report["ranking"][:3]
    for i, row in enumerate(top, start=1):
        print(
            f"  {i}. {row['technique_name']}: {row['sim_return_pct']:+.2f}% "
            f"(GT정합 {row['gt_align_score']*100:.1f})",
            flush=True,
        )
    print(f"요약 JSON: {json_path}", flush=True)
    print(f"요약 HTML: {html_path}", flush=True)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())