Bithumb/deepcoin/ground_truth/hybrid_dd_calibrate.py

"""
Hybrid DD tier 임계값 train 그리드 → holdout 검증 (Option C 2차).
"""

from __future__ import annotations

import json
from itertools import product
from pathlib import Path
from typing import Any

import pandas as pd

from config import GT_INITIAL_CASH_KRW, MATCH_HOLDOUT_RATIO, TRADING_FEE_RATE
from deepcoin.ground_truth.causal_gt_hybrid import build_monitor_hybrid_sized_trades
from deepcoin.ground_truth.gt_allocation import simulate_portfolio_steps
from deepcoin.matching.option_c_phase2 import walk_forward_portfolio_by_month
from deepcoin.matching.portfolio_sim import sort_fires_chronological
from deepcoin.matching.simulation import portfolio_holdout_from_steps
from deepcoin.paths import MATCHING_HYBRID_DD_CALIBRATION_JSON


def default_dd_grid() -> dict[str, list[float]]:
    """DD large/medium 탐색 그리드."""
    return {
        "dd_large_pct": [5.0, 6.0, 8.0, 10.0, 12.0],
        "dd_medium_pct": [2.0, 3.0, 4.0, 6.0],
    }


def load_hybrid_dd_params(path: Path | None = None) -> dict[str, float]:
    """
    캘리브레이션 JSON 또는 config 기본값.

    Args:
        path: JSON 경로.

    Returns:
        {dd_large_pct, dd_medium_pct}.
    """
    from config import CAUSAL_GT_DD_LARGE_PCT, CAUSAL_GT_DD_MEDIUM_PCT

    p = path or MATCHING_HYBRID_DD_CALIBRATION_JSON
    if p.is_file():
        data = json.loads(p.read_text(encoding="utf-8"))
        best = data.get("best_params") or {}
        if best.get("dd_large_pct") is not None:
            return {
                "dd_large_pct": float(best["dd_large_pct"]),
                "dd_medium_pct": float(
                    best.get("dd_medium_pct", CAUSAL_GT_DD_MEDIUM_PCT)
                ),
            }
    return {
        "dd_large_pct": float(CAUSAL_GT_DD_LARGE_PCT),
        "dd_medium_pct": float(CAUSAL_GT_DD_MEDIUM_PCT),
    }


def calibrate_hybrid_dd_thresholds(
    fires: pd.DataFrame,
    ohlc_df: pd.DataFrame,
    *,
    holdout_start: pd.Timestamp,
    grid: dict[str, list[float]] | None = None,
    last_price: float | None = None,
) -> dict[str, Any]:
    """
    train 구간 PnL 최대 → holdout PnL로 검증, 최적 DD 임계 저장.

    Args:
        fires: monitor 전체 발화.
        ohlc_df: 3m OHLC.
        holdout_start: holdout 시작 시각.
        grid: dd_large/medium 후보.
        last_price: 미청산 평가가.

    Returns:
        best_params, train/holdout metrics, grid top-N.
    """
    from deepcoin.ground_truth.gt_allocation import simulate_portfolio_summary

    grid = grid or default_dd_grid()
    chron = sort_fires_chronological(fires)
    results: list[dict[str, Any]] = []

    for dd_large, dd_medium in product(
        grid["dd_large_pct"],
        grid["dd_medium_pct"],
    ):
        if dd_medium >= dd_large:
            continue
        sized, stats = build_monitor_hybrid_sized_trades(
            chron,
            ohlc_df,
            enhanced=False,
            dd_large_pct=dd_large,
            dd_medium_pct=dd_medium,
        )
        steps = simulate_portfolio_steps(sized, use_amount_krw=True)
        train = portfolio_holdout_from_steps(
            [s for s in steps if pd.to_datetime(s["dt"]) < holdout_start],
            holdout_start,
            initial_if_empty=GT_INITIAL_CASH_KRW,
            note="train",
        )
        # train-only: start 1M → last asset before holdout
        if steps:
            pre = [
                float(s["total_asset_krw"])
                for s in steps
                if pd.to_datetime(s["dt"]) < holdout_start
            ]
            train_asset_end = pre[-1] if pre else GT_INITIAL_CASH_KRW
            train_pnl = (train_asset_end - GT_INITIAL_CASH_KRW) / GT_INITIAL_CASH_KRW * 100
        else:
            train_pnl = 0.0

        holdout = portfolio_holdout_from_steps(
            steps,
            holdout_start,
            note="holdout",
        )
        full = simulate_portfolio_summary(
            sized,
            last_price=last_price,
            use_amount_krw=True,
        )
        wf = walk_forward_portfolio_by_month(steps)
        pos_months = sum(1 for w in wf if float(w.get("pnl_pct") or 0) > 0)
        results.append(
            {
                "dd_large_pct": dd_large,
                "dd_medium_pct": dd_medium,
                "train_pnl_pct": round(train_pnl, 2),
                "holdout_pnl_pct": float(holdout.get("pnl_pct", 0)),
                "full_pnl_pct": float(full.get("pnl_pct", 0)),
                "max_drawdown_pct": float(full.get("max_drawdown_pct", 0)),
                "wf_positive_months": pos_months,
                "wf_months": len(wf),
                "large_tier_buys": stats.get("large_tier_buy_count", 0),
            }
        )

    if not results:
        return {"best_params": load_hybrid_dd_params(), "note": "empty grid"}

    # train PnL 1순위, holdout PnL 2순위
    ranked = sorted(
        results,
        key=lambda x: (x["train_pnl_pct"], x["holdout_pnl_pct"]),
        reverse=True,
    )
    best = ranked[0]
    return {
        "best_params": {
            "dd_large_pct": best["dd_large_pct"],
            "dd_medium_pct": best["dd_medium_pct"],
        },
        "best_metrics": best,
        "grid_size": len(results),
        "top5": ranked[:5],
        "holdout_start": str(holdout_start),
    }


def run_and_save_calibration(
    fires: pd.DataFrame,
    ohlc_df: pd.DataFrame,
    *,
    outcomes: pd.DataFrame,
    last_price: float | None = None,
    out_path: Path | None = None,
) -> dict[str, Any]:
    """
    캘리브레이션 실행 후 JSON 저장.

    Args:
        fires: monitor 발화.
        ohlc_df: OHLC.
        outcomes: fire_outcomes (holdout split).
        last_price: 평가 종가.
        out_path: 저장 경로.

    Returns:
        calibrate_hybrid_dd_thresholds 결과.
    """
    outcomes_ts = outcomes.copy()
    outcomes_ts["ts"] = pd.to_datetime(outcomes_ts["dt"])
    holdout_start = outcomes_ts["ts"].quantile(1.0 - MATCH_HOLDOUT_RATIO)
    result = calibrate_hybrid_dd_thresholds(
        fires,
        ohlc_df,
        holdout_start=holdout_start,
        last_price=last_price,
    )
    p = out_path or MATCHING_HYBRID_DD_CALIBRATION_JSON
    p.parent.mkdir(parents=True, exist_ok=True)
    p.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
    return result