인과적 GT 신호·복리 배분 시뮬을 도입하고 운영 정합성을 맞춘다.

미래 데이터를 쓰지 않는 causal 신호/tier와 전기간 복리 포트폴리오 비교로 GT 대비 sim_sized 검증 경로를 정리하고, 일한도·매수 상한·live_buy 스케일을 제거한다. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-31 19:50:54 +09:00
parent 5842cc9fa3
commit e68bb44083
16 changed files with 1817 additions and 474 deletions
--- a/deepcoin/matching/simulation.py
+++ b/deepcoin/matching/simulation.py
@@ -13,8 +13,6 @@ import pandas as pd

 from config import (
    GT_INITIAL_CASH_KRW,
-    LIVE_DAILY_KRW_MAX,
-    LIVE_MAX_TRADES_PER_DAY,
    LIVE_ORDER_KRW,
    LIVE_SLIPPAGE_PCT,
    MATCH_HOLDOUT_RATIO,
@@ -32,13 +30,19 @@ from config import (
 from deepcoin.ground_truth.ground_truth import (
    load_ground_truth,
    order_trades_chronological,
-    simulate_truth_portfolio,
+)
+from deepcoin.ground_truth.gt_allocation import simulate_portfolio_summary
+from deepcoin.ground_truth.gt_model import (
+    default_model,
+    model_to_dict,
+    summarize_leg_weights,
+    weight_policy_summary,
 )
 from deepcoin.matching.portfolio_sim import (
    fires_to_trade_list,
-    select_capped_fires,
    simulate_fixed_order_portfolio,
    simulate_sized_portfolio,
+    sort_fires_chronological,
 )
 from deepcoin.matching.select_rules import _rule_metrics, _split_train_valid_holdout
 from deepcoin.paths import resolve_ground_truth_file
@@ -129,7 +133,7 @@ def simulate_live_order_cap(
    holdout_only: bool = True,
 ) -> dict[str, Any]:
    """
-    1회·일 한도·슬리피지 가정으로 체결 가능한 발화만 집계.
+    GT 복리 배분·슬리피지 가정으로 체결 가능한 발화 집계 (일·금액 한도 없음).

    Args:
        outcomes: fire_outcomes (split 컬럼 있으면 holdout 필터 가능).
@@ -142,71 +146,23 @@ def simulate_live_order_cap(
    if outcomes.empty:
        return {"rules": {}, "note": "발화 없음"}

-    df = outcomes
+    df = outcomes.copy()
    if holdout_only and "split" in df.columns:
        df = df[df["split"] == "holdout"]
    if rule_ids is not None:
        df = df[df["rule_id"].isin(rule_ids)]
-    df = df.sort_values("dt").copy()
-    df["ts"] = pd.to_datetime(df["dt"])
-    df["day"] = df["ts"].dt.date.astype(str)
    slip = LIVE_SLIPPAGE_PCT
-    taken_rows: list[pd.DataFrame] = []

-    from deepcoin.matching.position_sizing import (
-        compute_buy_amount_krw,
-        live_buy_asset_pct_scale,
-        load_sizing_context_from_gt,
-    )
+    trades = fires_to_trade_list(sort_fires_chronological(df), apply_dynamic_sizing=True)
+    executed_dts = {
+        t["dt"]
+        for t in trades
+        if t.get("action") == "sell" or float(t.get("amount_krw") or 0) > 0
+    }
+    if not executed_dts:
+        return {"rules": {}, "taken_count": 0, "total_count": int(len(df))}

-    gt_trades, large_legs, approved = load_sizing_context_from_gt()
-    cash = float(GT_INITIAL_CASH_KRW)
-    qty = 0.0
-
-    for day, day_grp in df.groupby("day", sort=True):
-        spent = 0.0
-        n_trades = 0
-        taken_idx: list[int] = []
-        for idx, row in day_grp.iterrows():
-            if n_trades >= LIVE_MAX_TRADES_PER_DAY:
-                break
-            side = row["side"]
-            price = float(row["close"])
-            if side == "buy":
-                scale = live_buy_asset_pct_scale(
-                    str(row["rule_id"]),
-                    str(row["dt"]),
-                    gt_trades,
-                    approved_rules=approved,
-                    large_legs=large_legs,
-                )
-                planned = compute_buy_amount_krw(
-                    cash, qty, price, 1.0, 1.0, asset_pct_scale=scale
-                )
-            else:
-                planned = float(LIVE_ORDER_KRW)
-            if side == "buy":
-                if planned <= 0:
-                    continue
-                if spent + planned > LIVE_DAILY_KRW_MAX:
-                    break
-                fee = planned * TRADING_FEE_RATE
-                cash -= planned + fee
-                qty += planned / price if price > 0 else 0.0
-                spent += planned
-            elif side == "sell" and qty > 0:
-                gross = qty * price
-                cash += gross * (1.0 - TRADING_FEE_RATE)
-                qty = 0.0
-            n_trades += 1
-            taken_idx.append(idx)
-        if taken_idx:
-            taken_rows.append(day_grp.loc[taken_idx])
-
-    if not taken_rows:
-        return {"rules": {}, "taken_count": 0}
-
-    taken = pd.concat(taken_rows, ignore_index=True)
+    taken = df[df["dt"].astype(str).isin(executed_dts)].copy()
    taken["adj_ret_pct"] = taken["forward_ret_pct"] - slip

    by_rule: dict[str, Any] = {}
@@ -221,10 +177,8 @@ def simulate_live_order_cap(

    return {
        "assumptions": {
-            "order_krw": LIVE_ORDER_KRW,
-            "daily_krw_max": LIVE_DAILY_KRW_MAX,
            "slippage_pct": slip,
-            "sizing": "total_asset_pct_ev_wf_large_leg",
+            "sizing": "gt_model_compound_no_daily_cap",
        },
        "taken_count": int(len(taken)),
        "total_count": int(len(df)),
@@ -338,45 +292,126 @@ def build_simulation_report(
    gt_data = load_ground_truth(resolve_ground_truth_file()) or {}
    gt_trades = gt_data.get("trades") or []
    mark = (gt_data.get("summary") or {}).get("mark_price")
-    if gt_trades:
-        portfolio_compare["ground_truth_chrono"] = simulate_truth_portfolio(
-            order_trades_chronological(gt_trades),
+    gt_chrono = order_trades_chronological(gt_trades) if gt_trades else []
+
+    from deepcoin.ground_truth.gt_signal_rules import gt_signal_rule_ids
+    from config import GT_SIGNAL_CAUSAL, SIM_CAUSAL_TIER
+    from deepcoin.matching.position_sizing import load_gt_allocation_analysis
+
+    gt_alloc_analysis = load_gt_allocation_analysis(gt_trades) if gt_trades else {}
+
+    if gt_chrono:
+        if not any(float(t.get("amount_krw") or 0) > 0 for t in gt_chrono):
+            from deepcoin.ground_truth.ground_truth import allocate_gt_order_amounts
+
+            allocate_gt_order_amounts(gt_chrono)
+        portfolio_compare["ground_truth_chrono"] = simulate_portfolio_summary(
+            gt_chrono,
            last_price=float(mark) if mark else None,
+            use_amount_krw=True,
        )
-    holdout = outcomes[
-        outcomes["rule_id"].isin(monitor_ids) & (outcomes["split"] == "holdout")
-    ]
-    capped = select_capped_fires(holdout)
-    if not capped.empty:
+
+    # 전기간 monitor 규칙 — 100만원에서 복리 (holdout만 X)
+    all_monitor = outcomes[outcomes["rule_id"].isin(monitor_ids)]
+    if not all_monitor.empty:
+        sim_trades_full = fires_to_trade_list(sort_fires_chronological(all_monitor))
        portfolio_compare["sim_sized"] = simulate_sized_portfolio(
-            fires_to_trade_list(capped, apply_dynamic_sizing=True),
+            sim_trades_full,
            last_price=float(mark) if mark else None,
        )
        portfolio_compare["sim_fixed_order"] = simulate_fixed_order_portfolio(
-            fires_to_trade_list(capped, apply_dynamic_sizing=False),
+            fires_to_trade_list(all_monitor, apply_dynamic_sizing=False),
            last_price=float(mark) if mark else None,
        )

+    # GT 모델 일반화 규칙 (ZigZag+BB 매수 / ZigZag 고점 매도)
+    gt_buy_rule = "gt_model_buy_zigzag_bb"
+    gt_sell_rule = "gt_model_sell_zigzag_peak"
+    gt_pair_ids = {gt_buy_rule, gt_sell_rule}
+    if gt_pair_ids.issubset(set(outcomes["rule_id"].unique())):
+        gt_pair_fires = outcomes[outcomes["rule_id"].isin(gt_pair_ids)]
+        gt_pair_trades = fires_to_trade_list(sort_fires_chronological(gt_pair_fires))
+        portfolio_compare["sim_gt_model"] = simulate_sized_portfolio(
+            gt_pair_trades,
+            last_price=float(mark) if mark else None,
+        )
+
+    holdout = outcomes[
+        outcomes["rule_id"].isin(monitor_ids) & (outcomes["split"] == "holdout")
+    ]
+    if not holdout.empty and not all_monitor.empty:
+        full_trades = fires_to_trade_list(sort_fires_chronological(all_monitor))
+        if full_trades:
+            from deepcoin.ground_truth.gt_allocation import simulate_portfolio_steps
+
+            steps = simulate_portfolio_steps(full_trades, use_amount_krw=True)
+            if steps:
+                outcomes_ts = outcomes.copy()
+                outcomes_ts["ts"] = pd.to_datetime(outcomes_ts["dt"])
+                h0 = outcomes_ts["ts"].quantile(1.0 - MATCH_HOLDOUT_RATIO)
+                assets = [(s["dt"], float(s["total_asset_krw"])) for s in steps]
+                pre = [a for d, a in assets if pd.to_datetime(d) < h0]
+                in_h = [a for d, a in assets if pd.to_datetime(d) >= h0]
+                asset_start = pre[-1] if pre else float(GT_INITIAL_CASH_KRW)
+                asset_end = in_h[-1] if in_h else assets[-1][1]
+                ho_pnl_pct = (
+                    (asset_end - asset_start) / asset_start * 100.0
+                    if asset_start > 0
+                    else 0.0
+                )
+                portfolio_compare["sim_sized_holdout"] = {
+                    "initial_asset_krw": round(asset_start, 0),
+                    "final_asset_krw": round(asset_end, 0),
+                    "pnl_krw": round(asset_end - asset_start, 0),
+                    "pnl_pct": round(ho_pnl_pct, 2),
+                    "note": "전기간 복리 후 holdout 구간 자산 증감 (1M 재시작 아님)",
+                    "trade_count": int(len(holdout)),
+                }
+
+    if portfolio_compare.get("sim_sized") and portfolio_compare.get("ground_truth_chrono"):
+        gt_pnl = float(portfolio_compare["ground_truth_chrono"].get("pnl_pct", 0))
+        sim_pnl = float(portfolio_compare["sim_sized"].get("pnl_pct", 0))
+        portfolio_compare["gt_capture_ratio"] = round(
+            sim_pnl / gt_pnl if abs(gt_pnl) > 1e-6 else 0.0,
+            4,
+        )
+        portfolio_compare["gt_pnl_pct"] = gt_pnl
+        portfolio_compare["sim_sized_pnl_pct"] = sim_pnl
+        if portfolio_compare.get("sim_gt_model"):
+            gtp = float(portfolio_compare["sim_gt_model"].get("pnl_pct", 0))
+            portfolio_compare["gt_model_capture_ratio"] = round(
+                gtp / gt_pnl if abs(gt_pnl) > 1e-6 else 0.0,
+                4,
+            )
+
+    portfolio_compare["gt_allocation_analysis"] = gt_alloc_analysis
+    portfolio_compare["causal_mode"] = {
+        "gt_signal_causal": GT_SIGNAL_CAUSAL,
+        "sim_causal_tier": SIM_CAUSAL_TIER,
+        "note": "인과적: t 시점까지 데이터만 사용 (운영 정합)",
+    }
+
    gt_portfolio: dict[str, Any] = {}
    if ANALYSIS_GT_CALIBRATION_JSON.is_file():
        cal = json.loads(ANALYSIS_GT_CALIBRATION_JSON.read_text(encoding="utf-8"))
        gt_portfolio = cal.get("final", {})
    else:
-        from deepcoin.ground_truth.ground_truth import load_ground_truth
        from deepcoin.matching.gt_asset_calibration import (
            portfolio_asset_ratio,
        )

-        gt_data = load_ground_truth(resolve_ground_truth_file()) or {}
-        trades = gt_data.get("trades") or []
-        mark = (gt_data.get("summary") or {}).get("mark_price")
+        gt_data_cal = load_ground_truth(resolve_ground_truth_file()) or {}
+        trades = gt_data_cal.get("trades") or []
+        mark_cal = (gt_data_cal.get("summary") or {}).get("mark_price")
        if trades:
            gt_portfolio = {
-                "portfolio": portfolio_asset_ratio(trades, set(), mark),
+                "portfolio": portfolio_asset_ratio(trades, set(), mark_cal),
                "note": "캘리브레이션 미실행 — scripts/04_calibrate_gt_assets.py",
            }

    summaries = matched.get("all_rule_summaries") or matched.get("monitor_rules") or []
+    leg_weight_check = summarize_leg_weights(gt_trades) if gt_trades else {}
+    invalid_legs = [lid for lid, info in leg_weight_check.items() if not info.get("valid", True)]
    return {
        "label_mode": matched.get("label_mode"),
        "train_ratio": MATCH_TRAIN_RATIO,
@@ -389,7 +424,13 @@ def build_simulation_report(
        "live_order_cap_sim": live_cap,
        "go_no_go": go,
        "portfolio_compare": portfolio_compare,
-        "gt_model": gt_data.get("model"),
+        "gt_model": gt_data.get("model") or model_to_dict(default_model()),
+        "gt_weight_policy": weight_policy_summary(default_model()),
+        "gt_leg_weight_validation": {
+            "legs": leg_weight_check,
+            "invalid_leg_ids": invalid_legs,
+            "all_valid": len(invalid_legs) == 0,
+        },
        "monitor_rules": matched.get("monitor_rules", []),
        "gt_portfolio_calibration": gt_portfolio,
        "criteria": {
@@ -450,6 +491,24 @@ def run_simulation_report(
        )
    cal = report.get("gt_portfolio_calibration") or {}
    port = cal.get("portfolio") or {}
+    pc = report.get("portfolio_compare") or {}
+    if pc.get("gt_capture_ratio") is not None:
+        print(
+            f"[시뮬] GT 대비 sim_sized(전기간 복리): {pc.get('sim_sized_pnl_pct')}% "
+            f"/ GT {pc.get('gt_pnl_pct')}% "
+            f"(capture={pc.get('gt_capture_ratio'):.2%})"
+        )
+    if pc.get("gt_model_capture_ratio") is not None:
+        print(
+            f"[시뮬] GT 대비 sim_gt_model: "
+            f"{pc.get('sim_gt_model', {}).get('pnl_pct')}% "
+            f"(capture={pc.get('gt_model_capture_ratio'):.2%})"
+        )
+    if pc.get("sim_sized", {}).get("max_drawdown_pct") is not None:
+        print(
+            f"[시뮬] sim_sized MDD: {pc['sim_sized']['max_drawdown_pct']}% "
+            f"(GT MDD: {pc.get('ground_truth_chrono', {}).get('max_drawdown_pct')}%)"
+        )
    if port.get("asset_ratio") is not None:
        met = cal.get("targets_met", port.get("target_met_90"))
        print(