Files
Bithumb/deepcoin/matching/simulation.py
dsyoon 5842cc9fa3 GT 총자산 비율 매수·leg 티어 배분과 시뮬/실거래 포지션 사이징을 통합한다.
타점·비중을 gt_model로 일반화하고, amount_krw 시각순 배분·EV/WF·상위 leg 대형 매수를
position_sizing과 시뮬 HTML(고정 ₩/회 비교)에 반영한다.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-31 16:11:49 +09:00

461 lines
15 KiB
Python

"""
1단계: walk-forward·민감도·실거래 한도 가정 시뮬·Go/No-Go 리포트.
"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
import numpy as np
import pandas as pd
from config import (
GT_INITIAL_CASH_KRW,
LIVE_DAILY_KRW_MAX,
LIVE_MAX_TRADES_PER_DAY,
LIVE_ORDER_KRW,
LIVE_SLIPPAGE_PCT,
MATCH_HOLDOUT_RATIO,
MATCH_MIN_EV_VALID,
MATCH_MIN_FIRES_HOLDOUT,
MATCH_MIN_PROFIT_FACTOR,
MATCH_TRAIN_RATIO,
SIM_FEE_STRESS_MULT,
SIM_GO_MIN_HOLDOUT_EV,
SIM_GO_MIN_HOLDOUT_PF,
SIM_GO_WF_POSITIVE_RATIO,
SIM_WALK_FORWARD_MIN_MONTHS,
TRADING_FEE_RATE,
)
from deepcoin.ground_truth.ground_truth import (
load_ground_truth,
order_trades_chronological,
simulate_truth_portfolio,
)
from deepcoin.matching.portfolio_sim import (
fires_to_trade_list,
select_capped_fires,
simulate_fixed_order_portfolio,
simulate_sized_portfolio,
)
from deepcoin.matching.select_rules import _rule_metrics, _split_train_valid_holdout
from deepcoin.paths import resolve_ground_truth_file
from deepcoin.paths import (
ANALYSIS_GT_CALIBRATION_JSON,
MATCHING_FIRE_OUTCOMES,
MATCHING_MATCHED_RULES,
MATCHING_SIMULATION_HTML,
MATCHING_SIMULATION_JSON,
resolve_ground_truth_file,
)
def _fee_adjust_ret(series: pd.Series, mult: float) -> pd.Series:
"""
수수료 스트레스: 왕복 수수료 %p를 (mult-1)배 추가 차감.
Args:
series: forward_ret_pct.
mult: 수수료 배수 (2.0 = 2배).
Returns:
조정된 수익률 %.
"""
extra = TRADING_FEE_RATE * 2 * 100 * (mult - 1.0)
return series - extra
def walk_forward_by_month(outcomes: pd.DataFrame) -> list[dict[str, Any]]:
"""
규칙·월별 EV·PF 집계.
Args:
outcomes: fire_outcomes.
Returns:
월별 행 dict 리스트.
"""
if outcomes.empty:
return []
df = outcomes.copy()
df["ts"] = pd.to_datetime(df["dt"])
df["month"] = df["ts"].dt.to_period("M").astype(str)
rows: list[dict[str, Any]] = []
for (rid, month), grp in df.groupby(["rule_id", "month"]):
m = _rule_metrics(grp)
rows.append(
{
"rule_id": rid,
"side": grp["side"].iloc[0],
"month": month,
**m,
}
)
return rows
def walk_forward_summary(wf_rows: list[dict[str, Any]]) -> dict[str, Any]:
"""
규칙별 월별 EV 양수 비율 요약.
Args:
wf_rows: walk_forward_by_month 결과.
Returns:
rule_id → {positive_ratio, months, ...}.
"""
if not wf_rows:
return {}
df = pd.DataFrame(wf_rows)
out: dict[str, Any] = {}
for rid, grp in df.groupby("rule_id"):
n = len(grp)
pos = int((grp["ev_pct"] > 0).sum())
out[rid] = {
"months": n,
"positive_months": pos,
"positive_ratio": round(pos / n, 4) if n else 0.0,
"mean_ev_pct": round(float(grp["ev_pct"].mean()), 4),
}
return out
def simulate_live_order_cap(
outcomes: pd.DataFrame,
*,
rule_ids: set[str] | None = None,
holdout_only: bool = True,
) -> dict[str, Any]:
"""
1회·일 한도·슬리피지 가정으로 체결 가능한 발화만 집계.
Args:
outcomes: fire_outcomes (split 컬럼 있으면 holdout 필터 가능).
rule_ids: None이면 전 규칙, 지정 시 해당 rule만.
holdout_only: True면 split==holdout 만.
Returns:
규칙별·전체 요약.
"""
if outcomes.empty:
return {"rules": {}, "note": "발화 없음"}
df = outcomes
if holdout_only and "split" in df.columns:
df = df[df["split"] == "holdout"]
if rule_ids is not None:
df = df[df["rule_id"].isin(rule_ids)]
df = df.sort_values("dt").copy()
df["ts"] = pd.to_datetime(df["dt"])
df["day"] = df["ts"].dt.date.astype(str)
slip = LIVE_SLIPPAGE_PCT
taken_rows: list[pd.DataFrame] = []
from deepcoin.matching.position_sizing import (
compute_buy_amount_krw,
live_buy_asset_pct_scale,
load_sizing_context_from_gt,
)
gt_trades, large_legs, approved = load_sizing_context_from_gt()
cash = float(GT_INITIAL_CASH_KRW)
qty = 0.0
for day, day_grp in df.groupby("day", sort=True):
spent = 0.0
n_trades = 0
taken_idx: list[int] = []
for idx, row in day_grp.iterrows():
if n_trades >= LIVE_MAX_TRADES_PER_DAY:
break
side = row["side"]
price = float(row["close"])
if side == "buy":
scale = live_buy_asset_pct_scale(
str(row["rule_id"]),
str(row["dt"]),
gt_trades,
approved_rules=approved,
large_legs=large_legs,
)
planned = compute_buy_amount_krw(
cash, qty, price, 1.0, 1.0, asset_pct_scale=scale
)
else:
planned = float(LIVE_ORDER_KRW)
if side == "buy":
if planned <= 0:
continue
if spent + planned > LIVE_DAILY_KRW_MAX:
break
fee = planned * TRADING_FEE_RATE
cash -= planned + fee
qty += planned / price if price > 0 else 0.0
spent += planned
elif side == "sell" and qty > 0:
gross = qty * price
cash += gross * (1.0 - TRADING_FEE_RATE)
qty = 0.0
n_trades += 1
taken_idx.append(idx)
if taken_idx:
taken_rows.append(day_grp.loc[taken_idx])
if not taken_rows:
return {"rules": {}, "taken_count": 0}
taken = pd.concat(taken_rows, ignore_index=True)
taken["adj_ret_pct"] = taken["forward_ret_pct"] - slip
by_rule: dict[str, Any] = {}
for rid, grp in taken.groupby("rule_id"):
g = grp.copy()
g["forward_ret_pct"] = g["adj_ret_pct"]
by_rule[rid] = {
"taken_count": int(len(grp)),
"total_count": int((df["rule_id"] == rid).sum()),
"metrics": _rule_metrics(g),
}
return {
"assumptions": {
"order_krw": LIVE_ORDER_KRW,
"daily_krw_max": LIVE_DAILY_KRW_MAX,
"slippage_pct": slip,
"sizing": "total_asset_pct_ev_wf_large_leg",
},
"taken_count": int(len(taken)),
"total_count": int(len(df)),
"rules": by_rule,
"portfolio_adj_ev_pct": round(float(taken["adj_ret_pct"].mean()), 4),
}
def evaluate_go_no_go(
matched: dict[str, Any],
wf_summary: dict[str, Any],
fee_stress: dict[str, Any],
live_cap: dict[str, Any],
) -> dict[str, Any]:
"""
monitor_rules·holdout·walk-forward·수수료 스트레스 기준 Go/No-Go.
Args:
matched: matched_rules.json 내용.
wf_summary: walk_forward_summary.
fee_stress: 규칙별 fee 2x EV.
live_cap: simulate_live_order_cap.
Returns:
go, checks, monitor_rules 판정.
"""
rules = matched.get("monitor_rules") or matched.get("selected") or []
checks: list[dict[str, Any]] = []
all_go = True
for rule in rules:
rid = rule["rule_id"]
h = rule.get("metrics", {}).get("holdout", {})
ev_h = float(h.get("ev_pct", -999))
pf_h = float(h.get("profit_factor", 0))
wf = wf_summary.get(rid, {})
wf_ratio = float(wf.get("positive_ratio", 0))
wf_months = int(wf.get("months", 0))
stress_ev = fee_stress.get(rid, {}).get("ev_pct", -999)
c_holdout = ev_h >= SIM_GO_MIN_HOLDOUT_EV and pf_h >= SIM_GO_MIN_HOLDOUT_PF
c_wf = wf_months >= SIM_WALK_FORWARD_MIN_MONTHS and wf_ratio >= SIM_GO_WF_POSITIVE_RATIO
c_fee = stress_ev >= SIM_GO_MIN_HOLDOUT_EV
ok = c_holdout and c_wf and c_fee
if not ok:
all_go = False
checks.append(
{
"rule_id": rid,
"side": rule.get("side"),
"pass": ok,
"holdout_ev": ev_h,
"holdout_pf": pf_h,
"wf_positive_ratio": wf_ratio,
"fee_stress_ev": stress_ev,
}
)
return {
"go": all_go and len(checks) > 0,
"checks": checks,
"live_cap_taken_ratio": round(
live_cap.get("taken_count", 0) / max(live_cap.get("total_count", 1), 1),
4,
),
}
def build_simulation_report(
outcomes_path: Path | None = None,
matched_path: Path | None = None,
) -> dict[str, Any]:
"""
시뮬레이션 리포트 dict 생성.
Args:
outcomes_path: fire_outcomes.csv.
matched_path: matched_rules.json.
Returns:
simulation_report 전체 dict.
"""
op = outcomes_path or MATCHING_FIRE_OUTCOMES
mp = matched_path or MATCHING_MATCHED_RULES
if not op.is_file():
raise FileNotFoundError(f"fire_outcomes 없음: {op} — 04_match_rules.py 먼저 실행")
outcomes = pd.read_csv(op)
matched: dict[str, Any] = {}
if mp.is_file():
matched = json.loads(mp.read_text(encoding="utf-8"))
outcomes["split"] = _split_train_valid_holdout(outcomes)
wf_rows = walk_forward_by_month(outcomes)
wf_sum = walk_forward_summary(wf_rows)
fee_stress: dict[str, Any] = {}
for rid in outcomes["rule_id"].unique():
sub = outcomes[outcomes["rule_id"] == rid]
adj = _fee_adjust_ret(sub["forward_ret_pct"], SIM_FEE_STRESS_MULT)
fee_stress[rid] = _rule_metrics(
sub.assign(forward_ret_pct=adj)
)
monitor_ids = {r["rule_id"] for r in matched.get("monitor_rules", [])}
live_cap = simulate_live_order_cap(
outcomes, rule_ids=monitor_ids, holdout_only=True
)
go = evaluate_go_no_go(matched, wf_sum, fee_stress, live_cap)
portfolio_compare: dict[str, Any] = {}
gt_data = load_ground_truth(resolve_ground_truth_file()) or {}
gt_trades = gt_data.get("trades") or []
mark = (gt_data.get("summary") or {}).get("mark_price")
if gt_trades:
portfolio_compare["ground_truth_chrono"] = simulate_truth_portfolio(
order_trades_chronological(gt_trades),
last_price=float(mark) if mark else None,
)
holdout = outcomes[
outcomes["rule_id"].isin(monitor_ids) & (outcomes["split"] == "holdout")
]
capped = select_capped_fires(holdout)
if not capped.empty:
portfolio_compare["sim_sized"] = simulate_sized_portfolio(
fires_to_trade_list(capped, apply_dynamic_sizing=True),
last_price=float(mark) if mark else None,
)
portfolio_compare["sim_fixed_order"] = simulate_fixed_order_portfolio(
fires_to_trade_list(capped, apply_dynamic_sizing=False),
last_price=float(mark) if mark else None,
)
gt_portfolio: dict[str, Any] = {}
if ANALYSIS_GT_CALIBRATION_JSON.is_file():
cal = json.loads(ANALYSIS_GT_CALIBRATION_JSON.read_text(encoding="utf-8"))
gt_portfolio = cal.get("final", {})
else:
from deepcoin.ground_truth.ground_truth import load_ground_truth
from deepcoin.matching.gt_asset_calibration import (
portfolio_asset_ratio,
)
gt_data = load_ground_truth(resolve_ground_truth_file()) or {}
trades = gt_data.get("trades") or []
mark = (gt_data.get("summary") or {}).get("mark_price")
if trades:
gt_portfolio = {
"portfolio": portfolio_asset_ratio(trades, set(), mark),
"note": "캘리브레이션 미실행 — scripts/04_calibrate_gt_assets.py",
}
summaries = matched.get("all_rule_summaries") or matched.get("monitor_rules") or []
return {
"label_mode": matched.get("label_mode"),
"train_ratio": MATCH_TRAIN_RATIO,
"holdout_ratio": MATCH_HOLDOUT_RATIO,
"outcomes_rows": int(len(outcomes)),
"walk_forward": wf_rows,
"walk_forward_summary": wf_sum,
"fee_stress_mult": SIM_FEE_STRESS_MULT,
"fee_stress_by_rule": fee_stress,
"live_order_cap_sim": live_cap,
"go_no_go": go,
"portfolio_compare": portfolio_compare,
"gt_model": gt_data.get("model"),
"monitor_rules": matched.get("monitor_rules", []),
"gt_portfolio_calibration": gt_portfolio,
"criteria": {
"min_holdout_ev": SIM_GO_MIN_HOLDOUT_EV,
"min_holdout_pf": SIM_GO_MIN_HOLDOUT_PF,
"wf_positive_ratio": SIM_GO_WF_POSITIVE_RATIO,
"wf_min_months": SIM_WALK_FORWARD_MIN_MONTHS,
},
}
def write_simulation_html(report: dict[str, Any], out_path: Path) -> Path:
"""
simulation_report.html 저장 (ground_truth 차트 동일 스타일).
Args:
report: build_simulation_report 결과.
out_path: HTML 경로.
Returns:
out_path.
"""
from deepcoin.matching.simulation_html import write_simulation_report_html
return write_simulation_report_html(report, out_path)
def run_simulation_report(
outcomes_path: Path | None = None,
matched_path: Path | None = None,
) -> dict[str, Any]:
"""
시뮬 리포트 생성·저장·요약 출력.
Args:
outcomes_path: fire_outcomes.csv.
matched_path: matched_rules.json.
Returns:
report dict.
"""
report = build_simulation_report(outcomes_path, matched_path)
MATCHING_SIMULATION_JSON.parent.mkdir(parents=True, exist_ok=True)
MATCHING_SIMULATION_JSON.write_text(
json.dumps(report, ensure_ascii=False, indent=2),
encoding="utf-8",
)
write_simulation_html(report, MATCHING_SIMULATION_HTML)
go = report["go_no_go"]["go"]
print(f"[시뮬] 저장: {MATCHING_SIMULATION_JSON}")
print(f"[시뮬] 저장: {MATCHING_SIMULATION_HTML}")
print(f"[시뮬] Go/No-Go: {'GO' if go else 'NO-GO'}")
for c in report["go_no_go"].get("checks", []):
mark = "OK" if c["pass"] else "NG"
print(
f" [{mark}] {c['rule_id']}: holdout EV={c['holdout_ev']} "
f"WF+={c['wf_positive_ratio']} fee2x EV={c['fee_stress_ev']}"
)
cal = report.get("gt_portfolio_calibration") or {}
port = cal.get("portfolio") or {}
if port.get("asset_ratio") is not None:
met = cal.get("targets_met", port.get("target_met_90"))
print(
f"[시뮬] GT 총자산 대비 leg subset 비율: {port['asset_ratio']:.2%} "
f"({port.get('legs_covered')}/{port.get('legs_total')} leg) "
f"목표90%={'달성' if met else '미달'}"
)
return report