인과적 GT 신호·복리 배분 시뮬을 도입하고 운영 정합성을 맞춘다.

미래 데이터를 쓰지 않는 causal 신호/tier와 전기간 복리 포트폴리오 비교로 GT 대비 sim_sized 검증 경로를 정리하고, 일한도·매수 상한·live_buy 스케일을 제거한다.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
xavis
2026-05-31 19:50:54 +09:00
parent 5842cc9fa3
commit e68bb44083
16 changed files with 1817 additions and 474 deletions

View File

@@ -13,8 +13,6 @@ import pandas as pd
from config import (
GT_INITIAL_CASH_KRW,
LIVE_DAILY_KRW_MAX,
LIVE_MAX_TRADES_PER_DAY,
LIVE_ORDER_KRW,
LIVE_SLIPPAGE_PCT,
MATCH_HOLDOUT_RATIO,
@@ -32,13 +30,19 @@ from config import (
from deepcoin.ground_truth.ground_truth import (
load_ground_truth,
order_trades_chronological,
simulate_truth_portfolio,
)
from deepcoin.ground_truth.gt_allocation import simulate_portfolio_summary
from deepcoin.ground_truth.gt_model import (
default_model,
model_to_dict,
summarize_leg_weights,
weight_policy_summary,
)
from deepcoin.matching.portfolio_sim import (
fires_to_trade_list,
select_capped_fires,
simulate_fixed_order_portfolio,
simulate_sized_portfolio,
sort_fires_chronological,
)
from deepcoin.matching.select_rules import _rule_metrics, _split_train_valid_holdout
from deepcoin.paths import resolve_ground_truth_file
@@ -129,7 +133,7 @@ def simulate_live_order_cap(
holdout_only: bool = True,
) -> dict[str, Any]:
"""
1회·일 한도·슬리피지 가정으로 체결 가능한 발화 집계.
GT 복리 배분·슬리피지 가정으로 체결 가능한 발화 집계 (일·금액 한도 없음).
Args:
outcomes: fire_outcomes (split 컬럼 있으면 holdout 필터 가능).
@@ -142,71 +146,23 @@ def simulate_live_order_cap(
if outcomes.empty:
return {"rules": {}, "note": "발화 없음"}
df = outcomes
df = outcomes.copy()
if holdout_only and "split" in df.columns:
df = df[df["split"] == "holdout"]
if rule_ids is not None:
df = df[df["rule_id"].isin(rule_ids)]
df = df.sort_values("dt").copy()
df["ts"] = pd.to_datetime(df["dt"])
df["day"] = df["ts"].dt.date.astype(str)
slip = LIVE_SLIPPAGE_PCT
taken_rows: list[pd.DataFrame] = []
from deepcoin.matching.position_sizing import (
compute_buy_amount_krw,
live_buy_asset_pct_scale,
load_sizing_context_from_gt,
)
trades = fires_to_trade_list(sort_fires_chronological(df), apply_dynamic_sizing=True)
executed_dts = {
t["dt"]
for t in trades
if t.get("action") == "sell" or float(t.get("amount_krw") or 0) > 0
}
if not executed_dts:
return {"rules": {}, "taken_count": 0, "total_count": int(len(df))}
gt_trades, large_legs, approved = load_sizing_context_from_gt()
cash = float(GT_INITIAL_CASH_KRW)
qty = 0.0
for day, day_grp in df.groupby("day", sort=True):
spent = 0.0
n_trades = 0
taken_idx: list[int] = []
for idx, row in day_grp.iterrows():
if n_trades >= LIVE_MAX_TRADES_PER_DAY:
break
side = row["side"]
price = float(row["close"])
if side == "buy":
scale = live_buy_asset_pct_scale(
str(row["rule_id"]),
str(row["dt"]),
gt_trades,
approved_rules=approved,
large_legs=large_legs,
)
planned = compute_buy_amount_krw(
cash, qty, price, 1.0, 1.0, asset_pct_scale=scale
)
else:
planned = float(LIVE_ORDER_KRW)
if side == "buy":
if planned <= 0:
continue
if spent + planned > LIVE_DAILY_KRW_MAX:
break
fee = planned * TRADING_FEE_RATE
cash -= planned + fee
qty += planned / price if price > 0 else 0.0
spent += planned
elif side == "sell" and qty > 0:
gross = qty * price
cash += gross * (1.0 - TRADING_FEE_RATE)
qty = 0.0
n_trades += 1
taken_idx.append(idx)
if taken_idx:
taken_rows.append(day_grp.loc[taken_idx])
if not taken_rows:
return {"rules": {}, "taken_count": 0}
taken = pd.concat(taken_rows, ignore_index=True)
taken = df[df["dt"].astype(str).isin(executed_dts)].copy()
taken["adj_ret_pct"] = taken["forward_ret_pct"] - slip
by_rule: dict[str, Any] = {}
@@ -221,10 +177,8 @@ def simulate_live_order_cap(
return {
"assumptions": {
"order_krw": LIVE_ORDER_KRW,
"daily_krw_max": LIVE_DAILY_KRW_MAX,
"slippage_pct": slip,
"sizing": "total_asset_pct_ev_wf_large_leg",
"sizing": "gt_model_compound_no_daily_cap",
},
"taken_count": int(len(taken)),
"total_count": int(len(df)),
@@ -338,45 +292,126 @@ def build_simulation_report(
gt_data = load_ground_truth(resolve_ground_truth_file()) or {}
gt_trades = gt_data.get("trades") or []
mark = (gt_data.get("summary") or {}).get("mark_price")
if gt_trades:
portfolio_compare["ground_truth_chrono"] = simulate_truth_portfolio(
order_trades_chronological(gt_trades),
gt_chrono = order_trades_chronological(gt_trades) if gt_trades else []
from deepcoin.ground_truth.gt_signal_rules import gt_signal_rule_ids
from config import GT_SIGNAL_CAUSAL, SIM_CAUSAL_TIER
from deepcoin.matching.position_sizing import load_gt_allocation_analysis
gt_alloc_analysis = load_gt_allocation_analysis(gt_trades) if gt_trades else {}
if gt_chrono:
if not any(float(t.get("amount_krw") or 0) > 0 for t in gt_chrono):
from deepcoin.ground_truth.ground_truth import allocate_gt_order_amounts
allocate_gt_order_amounts(gt_chrono)
portfolio_compare["ground_truth_chrono"] = simulate_portfolio_summary(
gt_chrono,
last_price=float(mark) if mark else None,
use_amount_krw=True,
)
holdout = outcomes[
outcomes["rule_id"].isin(monitor_ids) & (outcomes["split"] == "holdout")
]
capped = select_capped_fires(holdout)
if not capped.empty:
# 전기간 monitor 규칙 — 100만원에서 복리 (holdout만 X)
all_monitor = outcomes[outcomes["rule_id"].isin(monitor_ids)]
if not all_monitor.empty:
sim_trades_full = fires_to_trade_list(sort_fires_chronological(all_monitor))
portfolio_compare["sim_sized"] = simulate_sized_portfolio(
fires_to_trade_list(capped, apply_dynamic_sizing=True),
sim_trades_full,
last_price=float(mark) if mark else None,
)
portfolio_compare["sim_fixed_order"] = simulate_fixed_order_portfolio(
fires_to_trade_list(capped, apply_dynamic_sizing=False),
fires_to_trade_list(all_monitor, apply_dynamic_sizing=False),
last_price=float(mark) if mark else None,
)
# GT 모델 일반화 규칙 (ZigZag+BB 매수 / ZigZag 고점 매도)
gt_buy_rule = "gt_model_buy_zigzag_bb"
gt_sell_rule = "gt_model_sell_zigzag_peak"
gt_pair_ids = {gt_buy_rule, gt_sell_rule}
if gt_pair_ids.issubset(set(outcomes["rule_id"].unique())):
gt_pair_fires = outcomes[outcomes["rule_id"].isin(gt_pair_ids)]
gt_pair_trades = fires_to_trade_list(sort_fires_chronological(gt_pair_fires))
portfolio_compare["sim_gt_model"] = simulate_sized_portfolio(
gt_pair_trades,
last_price=float(mark) if mark else None,
)
holdout = outcomes[
outcomes["rule_id"].isin(monitor_ids) & (outcomes["split"] == "holdout")
]
if not holdout.empty and not all_monitor.empty:
full_trades = fires_to_trade_list(sort_fires_chronological(all_monitor))
if full_trades:
from deepcoin.ground_truth.gt_allocation import simulate_portfolio_steps
steps = simulate_portfolio_steps(full_trades, use_amount_krw=True)
if steps:
outcomes_ts = outcomes.copy()
outcomes_ts["ts"] = pd.to_datetime(outcomes_ts["dt"])
h0 = outcomes_ts["ts"].quantile(1.0 - MATCH_HOLDOUT_RATIO)
assets = [(s["dt"], float(s["total_asset_krw"])) for s in steps]
pre = [a for d, a in assets if pd.to_datetime(d) < h0]
in_h = [a for d, a in assets if pd.to_datetime(d) >= h0]
asset_start = pre[-1] if pre else float(GT_INITIAL_CASH_KRW)
asset_end = in_h[-1] if in_h else assets[-1][1]
ho_pnl_pct = (
(asset_end - asset_start) / asset_start * 100.0
if asset_start > 0
else 0.0
)
portfolio_compare["sim_sized_holdout"] = {
"initial_asset_krw": round(asset_start, 0),
"final_asset_krw": round(asset_end, 0),
"pnl_krw": round(asset_end - asset_start, 0),
"pnl_pct": round(ho_pnl_pct, 2),
"note": "전기간 복리 후 holdout 구간 자산 증감 (1M 재시작 아님)",
"trade_count": int(len(holdout)),
}
if portfolio_compare.get("sim_sized") and portfolio_compare.get("ground_truth_chrono"):
gt_pnl = float(portfolio_compare["ground_truth_chrono"].get("pnl_pct", 0))
sim_pnl = float(portfolio_compare["sim_sized"].get("pnl_pct", 0))
portfolio_compare["gt_capture_ratio"] = round(
sim_pnl / gt_pnl if abs(gt_pnl) > 1e-6 else 0.0,
4,
)
portfolio_compare["gt_pnl_pct"] = gt_pnl
portfolio_compare["sim_sized_pnl_pct"] = sim_pnl
if portfolio_compare.get("sim_gt_model"):
gtp = float(portfolio_compare["sim_gt_model"].get("pnl_pct", 0))
portfolio_compare["gt_model_capture_ratio"] = round(
gtp / gt_pnl if abs(gt_pnl) > 1e-6 else 0.0,
4,
)
portfolio_compare["gt_allocation_analysis"] = gt_alloc_analysis
portfolio_compare["causal_mode"] = {
"gt_signal_causal": GT_SIGNAL_CAUSAL,
"sim_causal_tier": SIM_CAUSAL_TIER,
"note": "인과적: t 시점까지 데이터만 사용 (운영 정합)",
}
gt_portfolio: dict[str, Any] = {}
if ANALYSIS_GT_CALIBRATION_JSON.is_file():
cal = json.loads(ANALYSIS_GT_CALIBRATION_JSON.read_text(encoding="utf-8"))
gt_portfolio = cal.get("final", {})
else:
from deepcoin.ground_truth.ground_truth import load_ground_truth
from deepcoin.matching.gt_asset_calibration import (
portfolio_asset_ratio,
)
gt_data = load_ground_truth(resolve_ground_truth_file()) or {}
trades = gt_data.get("trades") or []
mark = (gt_data.get("summary") or {}).get("mark_price")
gt_data_cal = load_ground_truth(resolve_ground_truth_file()) or {}
trades = gt_data_cal.get("trades") or []
mark_cal = (gt_data_cal.get("summary") or {}).get("mark_price")
if trades:
gt_portfolio = {
"portfolio": portfolio_asset_ratio(trades, set(), mark),
"portfolio": portfolio_asset_ratio(trades, set(), mark_cal),
"note": "캘리브레이션 미실행 — scripts/04_calibrate_gt_assets.py",
}
summaries = matched.get("all_rule_summaries") or matched.get("monitor_rules") or []
leg_weight_check = summarize_leg_weights(gt_trades) if gt_trades else {}
invalid_legs = [lid for lid, info in leg_weight_check.items() if not info.get("valid", True)]
return {
"label_mode": matched.get("label_mode"),
"train_ratio": MATCH_TRAIN_RATIO,
@@ -389,7 +424,13 @@ def build_simulation_report(
"live_order_cap_sim": live_cap,
"go_no_go": go,
"portfolio_compare": portfolio_compare,
"gt_model": gt_data.get("model"),
"gt_model": gt_data.get("model") or model_to_dict(default_model()),
"gt_weight_policy": weight_policy_summary(default_model()),
"gt_leg_weight_validation": {
"legs": leg_weight_check,
"invalid_leg_ids": invalid_legs,
"all_valid": len(invalid_legs) == 0,
},
"monitor_rules": matched.get("monitor_rules", []),
"gt_portfolio_calibration": gt_portfolio,
"criteria": {
@@ -450,6 +491,24 @@ def run_simulation_report(
)
cal = report.get("gt_portfolio_calibration") or {}
port = cal.get("portfolio") or {}
pc = report.get("portfolio_compare") or {}
if pc.get("gt_capture_ratio") is not None:
print(
f"[시뮬] GT 대비 sim_sized(전기간 복리): {pc.get('sim_sized_pnl_pct')}% "
f"/ GT {pc.get('gt_pnl_pct')}% "
f"(capture={pc.get('gt_capture_ratio'):.2%})"
)
if pc.get("gt_model_capture_ratio") is not None:
print(
f"[시뮬] GT 대비 sim_gt_model: "
f"{pc.get('sim_gt_model', {}).get('pnl_pct')}% "
f"(capture={pc.get('gt_model_capture_ratio'):.2%})"
)
if pc.get("sim_sized", {}).get("max_drawdown_pct") is not None:
print(
f"[시뮬] sim_sized MDD: {pc['sim_sized']['max_drawdown_pct']}% "
f"(GT MDD: {pc.get('ground_truth_chrono', {}).get('max_drawdown_pct')}%)"
)
if port.get("asset_ratio") is not None:
met = cal.get("targets_met", port.get("target_met_90"))
print(