GT MTF 프로필·캘리브레이션과 04 매칭/시뮬/실거래 파이프라인을 추가한다.

3분~일봉 GT 타점 분석(03c), leg 체결 순서 수정, 총자산 90% 검증 루프, walk-forward Go/No-Go 시뮬, monitor·live_trader 및 reference 문서를 포함한다. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-31 11:27:50 +09:00
parent b52d61b777
commit 2cb67c42b3
47 changed files with 5956 additions and 209 deletions
--- a/deepcoin/matching/simulation.py
+++ b/deepcoin/matching/simulation.py
@@ -0,0 +1,371 @@
+"""
+1단계: walk-forward·민감도·실거래 한도 가정 시뮬·Go/No-Go 리포트.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import pandas as pd
+
+from config import (
+    LIVE_DAILY_KRW_MAX,
+    LIVE_MAX_TRADES_PER_DAY,
+    LIVE_ORDER_KRW,
+    LIVE_SLIPPAGE_PCT,
+    MATCH_HOLDOUT_RATIO,
+    MATCH_MIN_EV_VALID,
+    MATCH_MIN_FIRES_HOLDOUT,
+    MATCH_MIN_PROFIT_FACTOR,
+    MATCH_TRAIN_RATIO,
+    SIM_FEE_STRESS_MULT,
+    SIM_GO_MIN_HOLDOUT_EV,
+    SIM_GO_MIN_HOLDOUT_PF,
+    SIM_GO_WF_POSITIVE_RATIO,
+    SIM_WALK_FORWARD_MIN_MONTHS,
+    TRADING_FEE_RATE,
+)
+from deepcoin.matching.select_rules import _rule_metrics, _split_train_valid_holdout
+from deepcoin.paths import (
+    ANALYSIS_GT_CALIBRATION_JSON,
+    MATCHING_FIRE_OUTCOMES,
+    MATCHING_MATCHED_RULES,
+    MATCHING_SIMULATION_HTML,
+    MATCHING_SIMULATION_JSON,
+    resolve_ground_truth_file,
+)
+
+
+def _fee_adjust_ret(series: pd.Series, mult: float) -> pd.Series:
+    """
+    수수료 스트레스: 왕복 수수료 %p를 (mult-1)배 추가 차감.
+
+    Args:
+        series: forward_ret_pct.
+        mult: 수수료 배수 (2.0 = 2배).
+
+    Returns:
+        조정된 수익률 %.
+    """
+    extra = TRADING_FEE_RATE * 2 * 100 * (mult - 1.0)
+    return series - extra
+
+
+def walk_forward_by_month(outcomes: pd.DataFrame) -> list[dict[str, Any]]:
+    """
+    규칙·월별 EV·PF 집계.
+
+    Args:
+        outcomes: fire_outcomes.
+
+    Returns:
+        월별 행 dict 리스트.
+    """
+    if outcomes.empty:
+        return []
+    df = outcomes.copy()
+    df["ts"] = pd.to_datetime(df["dt"])
+    df["month"] = df["ts"].dt.to_period("M").astype(str)
+    rows: list[dict[str, Any]] = []
+    for (rid, month), grp in df.groupby(["rule_id", "month"]):
+        m = _rule_metrics(grp)
+        rows.append(
+            {
+                "rule_id": rid,
+                "side": grp["side"].iloc[0],
+                "month": month,
+                **m,
+            }
+        )
+    return rows
+
+
+def walk_forward_summary(wf_rows: list[dict[str, Any]]) -> dict[str, Any]:
+    """
+    규칙별 월별 EV 양수 비율 요약.
+
+    Args:
+        wf_rows: walk_forward_by_month 결과.
+
+    Returns:
+        rule_id → {positive_ratio, months, ...}.
+    """
+    if not wf_rows:
+        return {}
+    df = pd.DataFrame(wf_rows)
+    out: dict[str, Any] = {}
+    for rid, grp in df.groupby("rule_id"):
+        n = len(grp)
+        pos = int((grp["ev_pct"] > 0).sum())
+        out[rid] = {
+            "months": n,
+            "positive_months": pos,
+            "positive_ratio": round(pos / n, 4) if n else 0.0,
+            "mean_ev_pct": round(float(grp["ev_pct"].mean()), 4),
+        }
+    return out
+
+
+def simulate_live_order_cap(outcomes: pd.DataFrame) -> dict[str, Any]:
+    """
+    1회·일 한도·슬리피지 가정으로 체결 가능한 발화만 집계.
+
+    Args:
+        outcomes: fire_outcomes.
+
+    Returns:
+        규칙별·전체 요약.
+    """
+    if outcomes.empty:
+        return {"rules": {}, "note": "발화 없음"}
+
+    df = outcomes.sort_values("dt").copy()
+    df["ts"] = pd.to_datetime(df["dt"])
+    df["day"] = df["ts"].dt.date.astype(str)
+    slip = LIVE_SLIPPAGE_PCT
+    taken_rows: list[pd.DataFrame] = []
+
+    for day, day_grp in df.groupby("day", sort=True):
+        spent = 0.0
+        n_trades = 0
+        taken_idx: list[int] = []
+        for idx, row in day_grp.iterrows():
+            if n_trades >= LIVE_MAX_TRADES_PER_DAY:
+                break
+            if spent + LIVE_ORDER_KRW > LIVE_DAILY_KRW_MAX:
+                break
+            spent += LIVE_ORDER_KRW
+            n_trades += 1
+            taken_idx.append(idx)
+        if taken_idx:
+            taken_rows.append(day_grp.loc[taken_idx])
+
+    if not taken_rows:
+        return {"rules": {}, "taken_count": 0}
+
+    taken = pd.concat(taken_rows, ignore_index=True)
+    taken["adj_ret_pct"] = taken["forward_ret_pct"] - slip
+
+    by_rule: dict[str, Any] = {}
+    for rid, grp in taken.groupby("rule_id"):
+        g = grp.copy()
+        g["forward_ret_pct"] = g["adj_ret_pct"]
+        by_rule[rid] = {
+            "taken_count": int(len(grp)),
+            "total_count": int((df["rule_id"] == rid).sum()),
+            "metrics": _rule_metrics(g),
+        }
+
+    return {
+        "assumptions": {
+            "order_krw": LIVE_ORDER_KRW,
+            "daily_krw_max": LIVE_DAILY_KRW_MAX,
+            "slippage_pct": slip,
+        },
+        "taken_count": int(len(taken)),
+        "total_count": int(len(df)),
+        "rules": by_rule,
+        "portfolio_adj_ev_pct": round(float(taken["adj_ret_pct"].mean()), 4),
+    }
+
+
+def evaluate_go_no_go(
+    matched: dict[str, Any],
+    wf_summary: dict[str, Any],
+    fee_stress: dict[str, Any],
+    live_cap: dict[str, Any],
+) -> dict[str, Any]:
+    """
+    monitor_rules·holdout·walk-forward·수수료 스트레스 기준 Go/No-Go.
+
+    Args:
+        matched: matched_rules.json 내용.
+        wf_summary: walk_forward_summary.
+        fee_stress: 규칙별 fee 2x EV.
+        live_cap: simulate_live_order_cap.
+
+    Returns:
+        go, checks, monitor_rules 판정.
+    """
+    rules = matched.get("monitor_rules") or matched.get("selected") or []
+    checks: list[dict[str, Any]] = []
+    all_go = True
+
+    for rule in rules:
+        rid = rule["rule_id"]
+        h = rule.get("metrics", {}).get("holdout", {})
+        ev_h = float(h.get("ev_pct", -999))
+        pf_h = float(h.get("profit_factor", 0))
+        wf = wf_summary.get(rid, {})
+        wf_ratio = float(wf.get("positive_ratio", 0))
+        wf_months = int(wf.get("months", 0))
+        stress_ev = fee_stress.get(rid, {}).get("ev_pct", -999)
+
+        c_holdout = ev_h >= SIM_GO_MIN_HOLDOUT_EV and pf_h >= SIM_GO_MIN_HOLDOUT_PF
+        c_wf = wf_months >= SIM_WALK_FORWARD_MIN_MONTHS and wf_ratio >= SIM_GO_WF_POSITIVE_RATIO
+        c_fee = stress_ev >= SIM_GO_MIN_HOLDOUT_EV
+        ok = c_holdout and c_wf and c_fee
+        if not ok:
+            all_go = False
+        checks.append(
+            {
+                "rule_id": rid,
+                "side": rule.get("side"),
+                "pass": ok,
+                "holdout_ev": ev_h,
+                "holdout_pf": pf_h,
+                "wf_positive_ratio": wf_ratio,
+                "fee_stress_ev": stress_ev,
+            }
+        )
+
+    return {
+        "go": all_go and len(checks) > 0,
+        "checks": checks,
+        "live_cap_taken_ratio": round(
+            live_cap.get("taken_count", 0) / max(live_cap.get("total_count", 1), 1),
+            4,
+        ),
+    }
+
+
+def build_simulation_report(
+    outcomes_path: Path | None = None,
+    matched_path: Path | None = None,
+) -> dict[str, Any]:
+    """
+    시뮬레이션 리포트 dict 생성.
+
+    Args:
+        outcomes_path: fire_outcomes.csv.
+        matched_path: matched_rules.json.
+
+    Returns:
+        simulation_report 전체 dict.
+    """
+    op = outcomes_path or MATCHING_FIRE_OUTCOMES
+    mp = matched_path or MATCHING_MATCHED_RULES
+    if not op.is_file():
+        raise FileNotFoundError(f"fire_outcomes 없음: {op} — 04_match_rules.py 먼저 실행")
+    outcomes = pd.read_csv(op)
+    matched: dict[str, Any] = {}
+    if mp.is_file():
+        matched = json.loads(mp.read_text(encoding="utf-8"))
+
+    outcomes["split"] = _split_train_valid_holdout(outcomes)
+    wf_rows = walk_forward_by_month(outcomes)
+    wf_sum = walk_forward_summary(wf_rows)
+
+    fee_stress: dict[str, Any] = {}
+    for rid in outcomes["rule_id"].unique():
+        sub = outcomes[outcomes["rule_id"] == rid]
+        adj = _fee_adjust_ret(sub["forward_ret_pct"], SIM_FEE_STRESS_MULT)
+        fee_stress[rid] = _rule_metrics(
+            sub.assign(forward_ret_pct=adj)
+        )
+
+    live_cap = simulate_live_order_cap(outcomes)
+    go = evaluate_go_no_go(matched, wf_sum, fee_stress, live_cap)
+
+    gt_portfolio: dict[str, Any] = {}
+    if ANALYSIS_GT_CALIBRATION_JSON.is_file():
+        cal = json.loads(ANALYSIS_GT_CALIBRATION_JSON.read_text(encoding="utf-8"))
+        gt_portfolio = cal.get("final", {})
+    else:
+        from deepcoin.ground_truth.ground_truth import load_ground_truth
+        from deepcoin.matching.gt_asset_calibration import (
+            portfolio_asset_ratio,
+        )
+
+        gt_data = load_ground_truth(resolve_ground_truth_file()) or {}
+        trades = gt_data.get("trades") or []
+        mark = (gt_data.get("summary") or {}).get("mark_price")
+        if trades:
+            gt_portfolio = {
+                "portfolio": portfolio_asset_ratio(trades, set(), mark),
+                "note": "캘리브레이션 미실행 — scripts/04_calibrate_gt_assets.py",
+            }
+
+    summaries = matched.get("all_rule_summaries") or matched.get("monitor_rules") or []
+    return {
+        "label_mode": matched.get("label_mode"),
+        "train_ratio": MATCH_TRAIN_RATIO,
+        "holdout_ratio": MATCH_HOLDOUT_RATIO,
+        "outcomes_rows": int(len(outcomes)),
+        "walk_forward": wf_rows,
+        "walk_forward_summary": wf_sum,
+        "fee_stress_mult": SIM_FEE_STRESS_MULT,
+        "fee_stress_by_rule": fee_stress,
+        "live_order_cap_sim": live_cap,
+        "go_no_go": go,
+        "monitor_rules": matched.get("monitor_rules", []),
+        "gt_portfolio_calibration": gt_portfolio,
+        "criteria": {
+            "min_holdout_ev": SIM_GO_MIN_HOLDOUT_EV,
+            "min_holdout_pf": SIM_GO_MIN_HOLDOUT_PF,
+            "wf_positive_ratio": SIM_GO_WF_POSITIVE_RATIO,
+            "wf_min_months": SIM_WALK_FORWARD_MIN_MONTHS,
+        },
+    }
+
+
+def write_simulation_html(report: dict[str, Any], out_path: Path) -> Path:
+    """
+    simulation_report.html 저장 (ground_truth 차트 동일 스타일).
+
+    Args:
+        report: build_simulation_report 결과.
+        out_path: HTML 경로.
+
+    Returns:
+        out_path.
+    """
+    from deepcoin.matching.simulation_html import write_simulation_report_html
+
+    return write_simulation_report_html(report, out_path)
+
+
+def run_simulation_report(
+    outcomes_path: Path | None = None,
+    matched_path: Path | None = None,
+) -> dict[str, Any]:
+    """
+    시뮬 리포트 생성·저장·요약 출력.
+
+    Args:
+        outcomes_path: fire_outcomes.csv.
+        matched_path: matched_rules.json.
+
+    Returns:
+        report dict.
+    """
+    report = build_simulation_report(outcomes_path, matched_path)
+    MATCHING_SIMULATION_JSON.parent.mkdir(parents=True, exist_ok=True)
+    MATCHING_SIMULATION_JSON.write_text(
+        json.dumps(report, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+    write_simulation_html(report, MATCHING_SIMULATION_HTML)
+    go = report["go_no_go"]["go"]
+    print(f"[시뮬] 저장: {MATCHING_SIMULATION_JSON}")
+    print(f"[시뮬] 저장: {MATCHING_SIMULATION_HTML}")
+    print(f"[시뮬] Go/No-Go: {'GO' if go else 'NO-GO'}")
+    for c in report["go_no_go"].get("checks", []):
+        mark = "OK" if c["pass"] else "NG"
+        print(
+            f"  [{mark}] {c['rule_id']}: holdout EV={c['holdout_ev']} "
+            f"WF+={c['wf_positive_ratio']} fee2x EV={c['fee_stress_ev']}"
+        )
+    cal = report.get("gt_portfolio_calibration") or {}
+    port = cal.get("portfolio") or {}
+    if port.get("asset_ratio") is not None:
+        met = cal.get("targets_met", port.get("target_met_90"))
+        print(
+            f"[시뮬] GT 총자산 대비 leg subset 비율: {port['asset_ratio']:.2%} "
+            f"({port.get('legs_covered')}/{port.get('legs_total')} leg) "
+            f"목표90%={'달성' if met else '미달'}"
+        )
+    return report