""" Hybrid DD tier 임계값 train 그리드 → holdout 검증 (Option C 2차). """ from __future__ import annotations import json from itertools import product from pathlib import Path from typing import Any import pandas as pd from config import GT_INITIAL_CASH_KRW, MATCH_HOLDOUT_RATIO, TRADING_FEE_RATE from deepcoin.ground_truth.causal_gt_hybrid import build_monitor_hybrid_sized_trades from deepcoin.ground_truth.gt_allocation import simulate_portfolio_steps from deepcoin.matching.option_c_phase2 import walk_forward_portfolio_by_month from deepcoin.matching.portfolio_sim import sort_fires_chronological from deepcoin.matching.simulation import portfolio_holdout_from_steps from deepcoin.paths import MATCHING_HYBRID_DD_CALIBRATION_JSON def default_dd_grid() -> dict[str, list[float]]: """DD large/medium 탐색 그리드.""" return { "dd_large_pct": [5.0, 6.0, 8.0, 10.0, 12.0], "dd_medium_pct": [2.0, 3.0, 4.0, 6.0], } def load_hybrid_dd_params(path: Path | None = None) -> dict[str, float]: """ 캘리브레이션 JSON 또는 config 기본값. Args: path: JSON 경로. Returns: {dd_large_pct, dd_medium_pct}. """ from config import CAUSAL_GT_DD_LARGE_PCT, CAUSAL_GT_DD_MEDIUM_PCT p = path or MATCHING_HYBRID_DD_CALIBRATION_JSON if p.is_file(): data = json.loads(p.read_text(encoding="utf-8")) best = data.get("best_params") or {} if best.get("dd_large_pct") is not None: return { "dd_large_pct": float(best["dd_large_pct"]), "dd_medium_pct": float( best.get("dd_medium_pct", CAUSAL_GT_DD_MEDIUM_PCT) ), } return { "dd_large_pct": float(CAUSAL_GT_DD_LARGE_PCT), "dd_medium_pct": float(CAUSAL_GT_DD_MEDIUM_PCT), } def calibrate_hybrid_dd_thresholds( fires: pd.DataFrame, ohlc_df: pd.DataFrame, *, holdout_start: pd.Timestamp, grid: dict[str, list[float]] | None = None, last_price: float | None = None, ) -> dict[str, Any]: """ train 구간 PnL 최대 → holdout PnL로 검증, 최적 DD 임계 저장. Args: fires: monitor 전체 발화. ohlc_df: 3m OHLC. holdout_start: holdout 시작 시각. grid: dd_large/medium 후보. last_price: 미청산 평가가. Returns: best_params, train/holdout metrics, grid top-N. """ from deepcoin.ground_truth.gt_allocation import simulate_portfolio_summary grid = grid or default_dd_grid() chron = sort_fires_chronological(fires) results: list[dict[str, Any]] = [] for dd_large, dd_medium in product( grid["dd_large_pct"], grid["dd_medium_pct"], ): if dd_medium >= dd_large: continue sized, stats = build_monitor_hybrid_sized_trades( chron, ohlc_df, enhanced=False, dd_large_pct=dd_large, dd_medium_pct=dd_medium, ) steps = simulate_portfolio_steps(sized, use_amount_krw=True) train = portfolio_holdout_from_steps( [s for s in steps if pd.to_datetime(s["dt"]) < holdout_start], holdout_start, initial_if_empty=GT_INITIAL_CASH_KRW, note="train", ) # train-only: start 1M → last asset before holdout if steps: pre = [ float(s["total_asset_krw"]) for s in steps if pd.to_datetime(s["dt"]) < holdout_start ] train_asset_end = pre[-1] if pre else GT_INITIAL_CASH_KRW train_pnl = (train_asset_end - GT_INITIAL_CASH_KRW) / GT_INITIAL_CASH_KRW * 100 else: train_pnl = 0.0 holdout = portfolio_holdout_from_steps( steps, holdout_start, note="holdout", ) full = simulate_portfolio_summary( sized, last_price=last_price, use_amount_krw=True, ) wf = walk_forward_portfolio_by_month(steps) pos_months = sum(1 for w in wf if float(w.get("pnl_pct") or 0) > 0) results.append( { "dd_large_pct": dd_large, "dd_medium_pct": dd_medium, "train_pnl_pct": round(train_pnl, 2), "holdout_pnl_pct": float(holdout.get("pnl_pct", 0)), "full_pnl_pct": float(full.get("pnl_pct", 0)), "max_drawdown_pct": float(full.get("max_drawdown_pct", 0)), "wf_positive_months": pos_months, "wf_months": len(wf), "large_tier_buys": stats.get("large_tier_buy_count", 0), } ) if not results: return {"best_params": load_hybrid_dd_params(), "note": "empty grid"} # train PnL 1순위, holdout PnL 2순위 ranked = sorted( results, key=lambda x: (x["train_pnl_pct"], x["holdout_pnl_pct"]), reverse=True, ) best = ranked[0] return { "best_params": { "dd_large_pct": best["dd_large_pct"], "dd_medium_pct": best["dd_medium_pct"], }, "best_metrics": best, "grid_size": len(results), "top5": ranked[:5], "holdout_start": str(holdout_start), } def run_and_save_calibration( fires: pd.DataFrame, ohlc_df: pd.DataFrame, *, outcomes: pd.DataFrame, last_price: float | None = None, out_path: Path | None = None, ) -> dict[str, Any]: """ 캘리브레이션 실행 후 JSON 저장. Args: fires: monitor 발화. ohlc_df: OHLC. outcomes: fire_outcomes (holdout split). last_price: 평가 종가. out_path: 저장 경로. Returns: calibrate_hybrid_dd_thresholds 결과. """ outcomes_ts = outcomes.copy() outcomes_ts["ts"] = pd.to_datetime(outcomes_ts["dt"]) holdout_start = outcomes_ts["ts"].quantile(1.0 - MATCH_HOLDOUT_RATIO) result = calibrate_hybrid_dd_thresholds( fires, ohlc_df, holdout_start=holdout_start, last_price=last_price, ) p = out_path or MATCHING_HYBRID_DD_CALIBRATION_JSON p.parent.mkdir(parents=True, exist_ok=True) p.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") return result