인과적 GT 신호·복리 배분 시뮬을 도입하고 운영 정합성을 맞춘다.

미래 데이터를 쓰지 않는 causal 신호/tier와 전기간 복리 포트폴리오 비교로 GT 대비 sim_sized 검증 경로를 정리하고, 일한도·매수 상한·live_buy 스케일을 제거한다. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-31 19:50:54 +09:00
parent 5842cc9fa3
commit e68bb44083
16 changed files with 1817 additions and 474 deletions
--- a/deepcoin/matching/position_sizing.py
+++ b/deepcoin/matching/position_sizing.py
@@ -1,5 +1,5 @@
 """
-총자산 대비 최적 매수율(비중) · 현금 한도 · leg 상위·EV/WF 통과 대형 매수.
+총자산 대비 GT 모델 매수율(비중) · 보유 현금 한도 · leg tier 배분.
 """

 from __future__ import annotations
@@ -15,19 +15,14 @@ from config import (
    GT_INITIAL_CASH_KRW,
    GT_LARGE_LEG_TOP_PCT,
    GT_MIN_ORDER_KRW,
-    LIVE_BUY_PCT_LARGE,
-    LIVE_BUY_PCT_SMALL,
    MATCH_GT_TOLERANCE_MIN,
-    SIM_FEE_STRESS_MULT,
-    SIM_GO_MIN_HOLDOUT_EV,
-    SIM_GO_MIN_HOLDOUT_PF,
-    SIM_GO_WF_POSITIVE_RATIO,
-    SIM_WALK_FORWARD_MIN_MONTHS,
    TRADING_FEE_RATE,
 )
 from deepcoin.matching.load_rules import load_matched_rules
 from deepcoin.paths import MATCHING_FIRE_OUTCOMES, MATCHING_MATCHED_RULES

+_GT_ALLOC_ANALYSIS_CACHE: dict[str, Any] | None = None
+

 def portfolio_totals(
    cash: float,
@@ -78,10 +73,12 @@ def compute_buy_amount_krw(
    fee_rate: float = TRADING_FEE_RATE,
 ) -> float:
    """
-    총자산 × (최적 매수율 × scale)을 목표로, 가용 현금을 넘지 않게 매수 원화를 산출합니다.
+    목표=총보유자산×(최적 매수율×scale), 체결=min(목표, 보유현금/(1+fee)) 로 매수 원화를 산출합니다.
+
+    보유 현금 = 총보유자산 − 코인평가액(cash 인자).

    Args:
-        cash: 현금.
+        cash: 보유 현금(가용 원화).
        qty: 보유 수량.
        price: 체결가.
        weight: 타점 비중.
@@ -95,8 +92,8 @@ def compute_buy_amount_krw(
    """
    if price <= 0:
        return 0.0
-    total_asset, _, _ = portfolio_totals(cash, qty, price)
-    budget = max(cash / (1.0 + fee_rate), 0.0)
+    total_asset, _, available_cash = portfolio_totals(cash, qty, price)
+    budget = max(available_cash / (1.0 + fee_rate), 0.0)
    opt_rate = optimal_weight_share(weight, weight_sum_remaining) * asset_pct_scale
    target = total_asset * opt_rate
    amount = min(target, budget)
@@ -105,6 +102,27 @@ def compute_buy_amount_krw(
    return round(max(amount, 0.0), 0)


+def large_leg_ids_from_past_returns(
+    leg_returns: dict[int, float],
+    top_pct: float = GT_LARGE_LEG_TOP_PCT,
+) -> set[int]:
+    """
+    이미 청산된 leg의 realized return 상위 n% (인과적 tier).
+
+    Args:
+        leg_returns: leg_id → realized return %.
+        top_pct: 상위 비율.
+
+    Returns:
+        large leg id set.
+    """
+    if not leg_returns:
+        return set()
+    ranked = sorted(leg_returns.items(), key=lambda x: x[1], reverse=True)
+    n = max(1, int(len(ranked) * top_pct + 0.999999))
+    return {lid for lid, _ in ranked[:n]}
+
+
 def top_leg_ids_by_forward_return(
    trades: list[dict[str, Any]],
    top_pct: float = GT_LARGE_LEG_TOP_PCT,
@@ -215,6 +233,8 @@ def load_ev_wf_approved_rule_ids(
    if _APPROVED_RULES_CACHE is not None:
        return set(_APPROVED_RULES_CACHE)

+    from config import SIM_FEE_STRESS_MULT
+
    from deepcoin.matching.select_rules import _rule_metrics, _split_train_valid_holdout
    from deepcoin.matching.simulation import (
        evaluate_go_no_go,
@@ -258,6 +278,72 @@ def load_ev_wf_approved_rule_ids(
    return fallback


+def load_gt_allocation_analysis(
+    gt_trades: list[dict[str, Any]] | None = None,
+) -> dict[str, Any]:
+    """
+    GT amount_krw 분석 캐시 (tier 권장 pct).
+
+    Args:
+        gt_trades: GT trades. None이면 파일 로드.
+
+    Returns:
+        analyze_gt_buy_allocation 결과.
+    """
+    global _GT_ALLOC_ANALYSIS_CACHE
+    if _GT_ALLOC_ANALYSIS_CACHE is not None:
+        return _GT_ALLOC_ANALYSIS_CACHE
+    from deepcoin.ground_truth.gt_allocation_analysis import analyze_gt_buy_allocation
+    from deepcoin.paths import resolve_ground_truth_file
+
+    trades = gt_trades
+    if trades is None:
+        p = resolve_ground_truth_file()
+        if p.is_file():
+            trades = json.loads(p.read_text(encoding="utf-8")).get("trades") or []
+    if not trades:
+        _GT_ALLOC_ANALYSIS_CACHE = {}
+        return _GT_ALLOC_ANALYSIS_CACHE
+    chron = sorted(trades, key=lambda x: x["dt"])
+    if not any(float(t.get("amount_krw") or 0) > 0 for t in chron):
+        from deepcoin.ground_truth.ground_truth import allocate_gt_order_amounts
+
+        allocate_gt_order_amounts(chron)
+    _GT_ALLOC_ANALYSIS_CACHE = analyze_gt_buy_allocation(chron)
+    return _GT_ALLOC_ANALYSIS_CACHE
+
+
+def gt_tier_scale_for_trade(
+    trade: dict[str, Any],
+    gt_trades: list[dict[str, Any]],
+    large_legs: set[int],
+    *,
+    analysis: dict[str, Any] | None = None,
+) -> float:
+    """
+    GT leg tier 배분 스케일 (분석 권장값 또는 config).
+
+    시뮬은 live_buy_asset_pct_scale 대신 GT와 동일 tier 정책을 사용합니다.
+
+    Args:
+        trade: {dt, leg_id?, action, ...}.
+        gt_trades: GT trades (leg 매칭).
+        large_legs: 상위 leg.
+        analysis: analyze_gt_buy_allocation 결과.
+
+    Returns:
+        pct_large 또는 pct_small.
+    """
+    from deepcoin.ground_truth.gt_allocation_analysis import gt_tier_scale_from_analysis
+
+    lid = trade.get("leg_id")
+    if lid is None:
+        lid = nearest_gt_leg_id(str(trade["dt"]), gt_trades)
+    if lid is None:
+        return float(GT_BUY_PCT_SMALL_LEG)
+    return gt_tier_scale_from_analysis(int(lid), large_legs, analysis)
+
+
 def live_buy_asset_pct_scale(
    rule_id: str,
    dt: str,
@@ -267,7 +353,7 @@ def live_buy_asset_pct_scale(
    large_legs: set[int],
 ) -> float:
    """
-    실거래·시뮬 매수: EV/WF 통과 규칙 + leg 상위만 대형 비율.
+    실거래 전용 매수 tier (EV/WF·leg 상위). 시뮬은 gt_tier_scale_for_trade 사용.

    Args:
        rule_id: 규칙 ID.
@@ -279,6 +365,8 @@ def live_buy_asset_pct_scale(
    Returns:
        LIVE_BUY_PCT_LARGE 또는 LIVE_BUY_PCT_SMALL(또는 0에 가까운 소형).
    """
+    from config import LIVE_BUY_PCT_LARGE, LIVE_BUY_PCT_SMALL
+
    if rule_id not in approved_rules:
        return float(LIVE_BUY_PCT_SMALL)
    lid = nearest_gt_leg_id(dt, gt_trades)
@@ -287,10 +375,193 @@ def live_buy_asset_pct_scale(
    return float(LIVE_BUY_PCT_SMALL)


+def enrich_sim_trades_with_gt_weights(
+    trades: list[dict[str, Any]],
+    gt_trades: list[dict[str, Any]],
+    *,
+    causal_legs: bool = False,
+) -> list[dict[str, Any]]:
+    """
+    규칙 발화에 GT leg_id·매수/매도 weight를 부여합니다.
+
+    causal_legs=True: GT leg 매칭 없이 매수~매도 구간 순번 leg_id (인과적).
+
+    Args:
+        trades: {dt, action/side, price, rule_id}.
+        gt_trades: GT trades (leg 매칭, causal_legs=False 일 때).
+        causal_legs: 순차 leg_id.
+
+    Returns:
+        leg_id·weight가 채워진 trade dict.
+    """
+    from deepcoin.ground_truth.gt_model import leg_entry_weights, leg_exit_weights
+
+    rows = sorted(trades, key=lambda x: x["dt"])
+    pos = 0
+    seq_leg = 0
+    while pos < len(rows):
+        action = rows[pos].get("action", rows[pos].get("side", ""))
+        if action != "buy":
+            if causal_legs:
+                rows[pos]["leg_id"] = seq_leg
+            elif "leg_id" not in rows[pos]:
+                rows[pos]["leg_id"] = nearest_gt_leg_id(rows[pos]["dt"], gt_trades) or 0
+            rows[pos]["weight"] = float(rows[pos].get("weight", 1.0))
+            pos += 1
+            continue
+        buy_end = pos
+        while buy_end < len(rows):
+            a = rows[buy_end].get("action", rows[buy_end].get("side", ""))
+            if a != "buy":
+                break
+            buy_end += 1
+        buy_slice = rows[pos:buy_end]
+        sell_slice: list[dict[str, Any]] = []
+        sell_end = buy_end
+        while sell_end < len(rows):
+            a = rows[sell_end].get("action", rows[sell_end].get("side", ""))
+            if a == "buy":
+                break
+            if a == "sell":
+                sell_slice.append(rows[sell_end])
+            sell_end += 1
+
+        if causal_legs:
+            leg_id = seq_leg
+        else:
+            leg_id = nearest_gt_leg_id(buy_slice[0]["dt"], gt_trades) or 0
+        prices = [float(t["price"]) for t in buy_slice]
+        buy_weights = leg_entry_weights(prices)
+        for t, w in zip(buy_slice, buy_weights):
+            t["leg_id"] = leg_id
+            t["weight"] = round(w, 4)
+            if "action" not in t and "side" in t:
+                t["action"] = t["side"]
+
+        if sell_slice:
+            sw = leg_exit_weights(len(sell_slice))
+            for t, w in zip(sell_slice, sw):
+                t["leg_id"] = leg_id
+                t["weight"] = round(w, 4)
+                if "action" not in t and "side" in t:
+                    t["action"] = t["side"]
+        if causal_legs and sell_slice:
+            seq_leg += 1
+        pos = sell_end if sell_slice else buy_end
+    return rows
+
+
+def attach_gt_model_amounts(
+    trades: list[dict[str, Any]],
+    *,
+    gt_trades: list[dict[str, Any]] | None = None,
+    approved_rules: set[str] | None = None,
+    large_legs: set[int] | None = None,
+    initial_cash: float = GT_INITIAL_CASH_KRW,
+    fee_rate: float = TRADING_FEE_RATE,
+) -> list[dict[str, Any]]:
+    """
+    GT 모델 비중 + 공통 배분 엔진으로 amount_krw를 채웁니다.
+
+    시뮬·매칭 전용: leg·tier 모두 인과적(과거 청산 leg 수익만). GT 정답 배분은
+    ground_truth.allocate_gt_order_amounts 를 사용하세요.
+
+    Args:
+        trades: enrich_sim_trades_with_gt_weights 출력 또는 raw fires.
+        gt_trades: GT trades. None이면 파일 로드.
+        approved_rules: EV/WF 통과 rule (live scale용).
+        large_legs: 상위 leg.
+        initial_cash: 초기 현금.
+        fee_rate: 수수료율.
+
+    Returns:
+        amount_krw·weight·leg_id가 채워진 trade dict.
+    """
+    from deepcoin.ground_truth.gt_allocation import allocate_order_amounts_chronological
+
+    if gt_trades is None:
+        gt_trades, _, _ = load_sizing_context_from_gt()
+
+    enriched = enrich_sim_trades_with_gt_weights(
+        list(trades),
+        gt_trades,
+        causal_legs=True,
+    )
+
+    allocate_order_amounts_chronological(
+        enriched,
+        initial_cash=initial_cash,
+        fee_rate=fee_rate,
+        large_legs=None,
+        asset_pct_scale_fn=None,
+        causal_tier=True,
+    )
+    return enriched
+
+
+def plan_open_position_buy(
+    open_buys: list[dict[str, Any]],
+    candidate: dict[str, Any],
+    cash: float,
+    qty: float,
+    gt_trades: list[dict[str, Any]] | None = None,
+    *,
+    large_legs: set[int],
+    analysis: dict[str, Any] | None = None,
+    fee_rate: float = TRADING_FEE_RATE,
+) -> float:
+    """
+    미청산 포지션 내 다음 매수 원화 (GT tier·보유 현금 한도, 1회 상한 없음).
+
+    Args:
+        open_buys: 현재 포지션에서 이미 체결된 매수 dict.
+        candidate: 이번 매수 후보 {dt, price, rule_id, leg_id?, ...}.
+        cash: 보유 현금.
+        qty: 보유 수량.
+        gt_trades: GT leg 매칭용.
+        large_legs: 상위 leg.
+        analysis: GT 배분 분석.
+        fee_rate: 수수료율.
+
+    Returns:
+        매수 계획 원화.
+    """
+    from deepcoin.ground_truth.gt_model import leg_entry_weights
+
+    if gt_trades is None:
+        gt_trades, _, _ = load_sizing_context_from_gt()
+    if analysis is None:
+        analysis = load_gt_allocation_analysis(gt_trades)
+
+    prices = [float(t["price"]) for t in open_buys] + [float(candidate["price"])]
+    weights = leg_entry_weights(prices)
+    idx = len(open_buys)
+    w = weights[idx]
+    w_sum = sum(weights[idx:])
+    cand = dict(candidate)
+    if "leg_id" not in cand:
+        cand["leg_id"] = nearest_gt_leg_id(str(candidate["dt"]), gt_trades)
+    scale = gt_tier_scale_for_trade(
+        cand,
+        gt_trades,
+        large_legs,
+        analysis=analysis,
+    )
+    return compute_buy_amount_krw(
+        cash,
+        qty,
+        float(candidate["price"]),
+        w,
+        w_sum,
+        asset_pct_scale=scale,
+        fee_rate=fee_rate,
+    )
+
+
 def attach_dynamic_buy_amounts(
    trades: list[dict[str, Any]],
    *,
-    gt_trades: list[dict[str, Any]],
+    gt_trades: list[dict[str, Any]] | None = None,
    approved_rules: set[str] | None = None,
    large_legs: set[int] | None = None,
    initial_cash: float = GT_INITIAL_CASH_KRW,
@@ -298,60 +569,18 @@ def attach_dynamic_buy_amounts(
    fee_rate: float = TRADING_FEE_RATE,
 ) -> list[dict[str, Any]]:
    """
-    시뮬 발화 trade dict에 amount_krw(총자산 비율·현금 한도)를 채웁니다.
+    시뮬 발화 trade dict에 amount_krw(GT 모델·보유 현금 한도)를 채웁니다.

-    Args:
-        trades: 시간순 {dt, action, price, rule_id, …}.
-        gt_trades: GT leg 매칭용.
-        approved_rules: EV/WF 통과 rule. None이면 전 규칙 대형 허용 안 함.
-        large_legs: 상위 leg. None이면 GT에서 산출.
-        initial_cash: 초기 현금.
-        default_weight: 매수 weight 기본값.
-        fee_rate: 수수료율.
-
-    Returns:
-        amount_krw가 채워진 동일 리스트.
+    attach_gt_model_amounts 별칭.
    """
-    if large_legs is None:
-        large_legs = top_leg_ids_by_forward_return(gt_trades)
-    if approved_rules is None:
-        approved_rules = set()
-
-    cash = float(initial_cash)
-    qty = 0.0
-    for t in sorted(trades, key=lambda x: x["dt"]):
-        action = t.get("action", t.get("side", ""))
-        price = float(t["price"])
-        if price <= 0:
-            continue
-        if action == "buy":
-            rid = str(t.get("rule_id", ""))
-            scale = live_buy_asset_pct_scale(
-                rid, t["dt"], gt_trades,
-                approved_rules=approved_rules,
-                large_legs=large_legs,
-            )
-            amount = compute_buy_amount_krw(
-                cash,
-                qty,
-                price,
-                float(t.get("weight", default_weight)),
-                float(t.get("weight", default_weight)),
-                asset_pct_scale=scale,
-                fee_rate=fee_rate,
-            )
-            t["amount_krw"] = amount
-            if amount > 0:
-                fee = amount * fee_rate
-                cash -= amount + fee
-                qty += amount / price
-        elif action == "sell" and qty > 1e-12:
-            gross = qty * price
-            t["amount_krw"] = round(gross, 0)
-            fee = gross * fee_rate
-            cash += gross - fee
-            qty = 0.0
-    return trades
+    return attach_gt_model_amounts(
+        trades,
+        gt_trades=gt_trades,
+        approved_rules=approved_rules,
+        large_legs=large_legs,
+        initial_cash=initial_cash,
+        fee_rate=fee_rate,
+    )


 def load_sizing_context_from_gt(
@@ -374,5 +603,4 @@ def load_sizing_context_from_gt(
        data = json.loads(p.read_text(encoding="utf-8"))
        trades = data.get("trades") or []
    large = top_leg_ids_by_forward_return(trades)
-    approved = load_ev_wf_approved_rule_ids()
-    return trades, large, approved
+    return trades, large, set()