""" 1단계: walk-forward·민감도·실거래 한도 가정 시뮬·Go/No-Go 리포트. """ from __future__ import annotations import json from pathlib import Path from typing import Any import numpy as np import pandas as pd from config import ( GT_INITIAL_CASH_KRW, LIVE_DAILY_KRW_MAX, LIVE_MAX_TRADES_PER_DAY, LIVE_ORDER_KRW, LIVE_SLIPPAGE_PCT, MATCH_HOLDOUT_RATIO, MATCH_MIN_EV_VALID, MATCH_MIN_FIRES_HOLDOUT, MATCH_MIN_PROFIT_FACTOR, MATCH_TRAIN_RATIO, SIM_FEE_STRESS_MULT, SIM_GO_MIN_HOLDOUT_EV, SIM_GO_MIN_HOLDOUT_PF, SIM_GO_WF_POSITIVE_RATIO, SIM_WALK_FORWARD_MIN_MONTHS, TRADING_FEE_RATE, ) from deepcoin.ground_truth.ground_truth import ( load_ground_truth, order_trades_chronological, simulate_truth_portfolio, ) from deepcoin.matching.portfolio_sim import ( fires_to_trade_list, select_capped_fires, simulate_fixed_order_portfolio, simulate_sized_portfolio, ) from deepcoin.matching.select_rules import _rule_metrics, _split_train_valid_holdout from deepcoin.paths import resolve_ground_truth_file from deepcoin.paths import ( ANALYSIS_GT_CALIBRATION_JSON, MATCHING_FIRE_OUTCOMES, MATCHING_MATCHED_RULES, MATCHING_SIMULATION_HTML, MATCHING_SIMULATION_JSON, resolve_ground_truth_file, ) def _fee_adjust_ret(series: pd.Series, mult: float) -> pd.Series: """ 수수료 스트레스: 왕복 수수료 %p를 (mult-1)배 추가 차감. Args: series: forward_ret_pct. mult: 수수료 배수 (2.0 = 2배). Returns: 조정된 수익률 %. """ extra = TRADING_FEE_RATE * 2 * 100 * (mult - 1.0) return series - extra def walk_forward_by_month(outcomes: pd.DataFrame) -> list[dict[str, Any]]: """ 규칙·월별 EV·PF 집계. Args: outcomes: fire_outcomes. Returns: 월별 행 dict 리스트. """ if outcomes.empty: return [] df = outcomes.copy() df["ts"] = pd.to_datetime(df["dt"]) df["month"] = df["ts"].dt.to_period("M").astype(str) rows: list[dict[str, Any]] = [] for (rid, month), grp in df.groupby(["rule_id", "month"]): m = _rule_metrics(grp) rows.append( { "rule_id": rid, "side": grp["side"].iloc[0], "month": month, **m, } ) return rows def walk_forward_summary(wf_rows: list[dict[str, Any]]) -> dict[str, Any]: """ 규칙별 월별 EV 양수 비율 요약. Args: wf_rows: walk_forward_by_month 결과. Returns: rule_id → {positive_ratio, months, ...}. """ if not wf_rows: return {} df = pd.DataFrame(wf_rows) out: dict[str, Any] = {} for rid, grp in df.groupby("rule_id"): n = len(grp) pos = int((grp["ev_pct"] > 0).sum()) out[rid] = { "months": n, "positive_months": pos, "positive_ratio": round(pos / n, 4) if n else 0.0, "mean_ev_pct": round(float(grp["ev_pct"].mean()), 4), } return out def simulate_live_order_cap( outcomes: pd.DataFrame, *, rule_ids: set[str] | None = None, holdout_only: bool = True, ) -> dict[str, Any]: """ 1회·일 한도·슬리피지 가정으로 체결 가능한 발화만 집계. Args: outcomes: fire_outcomes (split 컬럼 있으면 holdout 필터 가능). rule_ids: None이면 전 규칙, 지정 시 해당 rule만. holdout_only: True면 split==holdout 만. Returns: 규칙별·전체 요약. """ if outcomes.empty: return {"rules": {}, "note": "발화 없음"} df = outcomes if holdout_only and "split" in df.columns: df = df[df["split"] == "holdout"] if rule_ids is not None: df = df[df["rule_id"].isin(rule_ids)] df = df.sort_values("dt").copy() df["ts"] = pd.to_datetime(df["dt"]) df["day"] = df["ts"].dt.date.astype(str) slip = LIVE_SLIPPAGE_PCT taken_rows: list[pd.DataFrame] = [] from deepcoin.matching.position_sizing import ( compute_buy_amount_krw, live_buy_asset_pct_scale, load_sizing_context_from_gt, ) gt_trades, large_legs, approved = load_sizing_context_from_gt() cash = float(GT_INITIAL_CASH_KRW) qty = 0.0 for day, day_grp in df.groupby("day", sort=True): spent = 0.0 n_trades = 0 taken_idx: list[int] = [] for idx, row in day_grp.iterrows(): if n_trades >= LIVE_MAX_TRADES_PER_DAY: break side = row["side"] price = float(row["close"]) if side == "buy": scale = live_buy_asset_pct_scale( str(row["rule_id"]), str(row["dt"]), gt_trades, approved_rules=approved, large_legs=large_legs, ) planned = compute_buy_amount_krw( cash, qty, price, 1.0, 1.0, asset_pct_scale=scale ) else: planned = float(LIVE_ORDER_KRW) if side == "buy": if planned <= 0: continue if spent + planned > LIVE_DAILY_KRW_MAX: break fee = planned * TRADING_FEE_RATE cash -= planned + fee qty += planned / price if price > 0 else 0.0 spent += planned elif side == "sell" and qty > 0: gross = qty * price cash += gross * (1.0 - TRADING_FEE_RATE) qty = 0.0 n_trades += 1 taken_idx.append(idx) if taken_idx: taken_rows.append(day_grp.loc[taken_idx]) if not taken_rows: return {"rules": {}, "taken_count": 0} taken = pd.concat(taken_rows, ignore_index=True) taken["adj_ret_pct"] = taken["forward_ret_pct"] - slip by_rule: dict[str, Any] = {} for rid, grp in taken.groupby("rule_id"): g = grp.copy() g["forward_ret_pct"] = g["adj_ret_pct"] by_rule[rid] = { "taken_count": int(len(grp)), "total_count": int((df["rule_id"] == rid).sum()), "metrics": _rule_metrics(g), } return { "assumptions": { "order_krw": LIVE_ORDER_KRW, "daily_krw_max": LIVE_DAILY_KRW_MAX, "slippage_pct": slip, "sizing": "total_asset_pct_ev_wf_large_leg", }, "taken_count": int(len(taken)), "total_count": int(len(df)), "rules": by_rule, "portfolio_adj_ev_pct": round(float(taken["adj_ret_pct"].mean()), 4), } def evaluate_go_no_go( matched: dict[str, Any], wf_summary: dict[str, Any], fee_stress: dict[str, Any], live_cap: dict[str, Any], ) -> dict[str, Any]: """ monitor_rules·holdout·walk-forward·수수료 스트레스 기준 Go/No-Go. Args: matched: matched_rules.json 내용. wf_summary: walk_forward_summary. fee_stress: 규칙별 fee 2x EV. live_cap: simulate_live_order_cap. Returns: go, checks, monitor_rules 판정. """ rules = matched.get("monitor_rules") or matched.get("selected") or [] checks: list[dict[str, Any]] = [] all_go = True for rule in rules: rid = rule["rule_id"] h = rule.get("metrics", {}).get("holdout", {}) ev_h = float(h.get("ev_pct", -999)) pf_h = float(h.get("profit_factor", 0)) wf = wf_summary.get(rid, {}) wf_ratio = float(wf.get("positive_ratio", 0)) wf_months = int(wf.get("months", 0)) stress_ev = fee_stress.get(rid, {}).get("ev_pct", -999) c_holdout = ev_h >= SIM_GO_MIN_HOLDOUT_EV and pf_h >= SIM_GO_MIN_HOLDOUT_PF c_wf = wf_months >= SIM_WALK_FORWARD_MIN_MONTHS and wf_ratio >= SIM_GO_WF_POSITIVE_RATIO c_fee = stress_ev >= SIM_GO_MIN_HOLDOUT_EV ok = c_holdout and c_wf and c_fee if not ok: all_go = False checks.append( { "rule_id": rid, "side": rule.get("side"), "pass": ok, "holdout_ev": ev_h, "holdout_pf": pf_h, "wf_positive_ratio": wf_ratio, "fee_stress_ev": stress_ev, } ) return { "go": all_go and len(checks) > 0, "checks": checks, "live_cap_taken_ratio": round( live_cap.get("taken_count", 0) / max(live_cap.get("total_count", 1), 1), 4, ), } def build_simulation_report( outcomes_path: Path | None = None, matched_path: Path | None = None, ) -> dict[str, Any]: """ 시뮬레이션 리포트 dict 생성. Args: outcomes_path: fire_outcomes.csv. matched_path: matched_rules.json. Returns: simulation_report 전체 dict. """ op = outcomes_path or MATCHING_FIRE_OUTCOMES mp = matched_path or MATCHING_MATCHED_RULES if not op.is_file(): raise FileNotFoundError(f"fire_outcomes 없음: {op} — 04_match_rules.py 먼저 실행") outcomes = pd.read_csv(op) matched: dict[str, Any] = {} if mp.is_file(): matched = json.loads(mp.read_text(encoding="utf-8")) outcomes["split"] = _split_train_valid_holdout(outcomes) wf_rows = walk_forward_by_month(outcomes) wf_sum = walk_forward_summary(wf_rows) fee_stress: dict[str, Any] = {} for rid in outcomes["rule_id"].unique(): sub = outcomes[outcomes["rule_id"] == rid] adj = _fee_adjust_ret(sub["forward_ret_pct"], SIM_FEE_STRESS_MULT) fee_stress[rid] = _rule_metrics( sub.assign(forward_ret_pct=adj) ) monitor_ids = {r["rule_id"] for r in matched.get("monitor_rules", [])} live_cap = simulate_live_order_cap( outcomes, rule_ids=monitor_ids, holdout_only=True ) go = evaluate_go_no_go(matched, wf_sum, fee_stress, live_cap) portfolio_compare: dict[str, Any] = {} gt_data = load_ground_truth(resolve_ground_truth_file()) or {} gt_trades = gt_data.get("trades") or [] mark = (gt_data.get("summary") or {}).get("mark_price") if gt_trades: portfolio_compare["ground_truth_chrono"] = simulate_truth_portfolio( order_trades_chronological(gt_trades), last_price=float(mark) if mark else None, ) holdout = outcomes[ outcomes["rule_id"].isin(monitor_ids) & (outcomes["split"] == "holdout") ] capped = select_capped_fires(holdout) if not capped.empty: portfolio_compare["sim_sized"] = simulate_sized_portfolio( fires_to_trade_list(capped, apply_dynamic_sizing=True), last_price=float(mark) if mark else None, ) portfolio_compare["sim_fixed_order"] = simulate_fixed_order_portfolio( fires_to_trade_list(capped, apply_dynamic_sizing=False), last_price=float(mark) if mark else None, ) gt_portfolio: dict[str, Any] = {} if ANALYSIS_GT_CALIBRATION_JSON.is_file(): cal = json.loads(ANALYSIS_GT_CALIBRATION_JSON.read_text(encoding="utf-8")) gt_portfolio = cal.get("final", {}) else: from deepcoin.ground_truth.ground_truth import load_ground_truth from deepcoin.matching.gt_asset_calibration import ( portfolio_asset_ratio, ) gt_data = load_ground_truth(resolve_ground_truth_file()) or {} trades = gt_data.get("trades") or [] mark = (gt_data.get("summary") or {}).get("mark_price") if trades: gt_portfolio = { "portfolio": portfolio_asset_ratio(trades, set(), mark), "note": "캘리브레이션 미실행 — scripts/04_calibrate_gt_assets.py", } summaries = matched.get("all_rule_summaries") or matched.get("monitor_rules") or [] return { "label_mode": matched.get("label_mode"), "train_ratio": MATCH_TRAIN_RATIO, "holdout_ratio": MATCH_HOLDOUT_RATIO, "outcomes_rows": int(len(outcomes)), "walk_forward": wf_rows, "walk_forward_summary": wf_sum, "fee_stress_mult": SIM_FEE_STRESS_MULT, "fee_stress_by_rule": fee_stress, "live_order_cap_sim": live_cap, "go_no_go": go, "portfolio_compare": portfolio_compare, "gt_model": gt_data.get("model"), "monitor_rules": matched.get("monitor_rules", []), "gt_portfolio_calibration": gt_portfolio, "criteria": { "min_holdout_ev": SIM_GO_MIN_HOLDOUT_EV, "min_holdout_pf": SIM_GO_MIN_HOLDOUT_PF, "wf_positive_ratio": SIM_GO_WF_POSITIVE_RATIO, "wf_min_months": SIM_WALK_FORWARD_MIN_MONTHS, }, } def write_simulation_html(report: dict[str, Any], out_path: Path) -> Path: """ simulation_report.html 저장 (ground_truth 차트 동일 스타일). Args: report: build_simulation_report 결과. out_path: HTML 경로. Returns: out_path. """ from deepcoin.matching.simulation_html import write_simulation_report_html return write_simulation_report_html(report, out_path) def run_simulation_report( outcomes_path: Path | None = None, matched_path: Path | None = None, ) -> dict[str, Any]: """ 시뮬 리포트 생성·저장·요약 출력. Args: outcomes_path: fire_outcomes.csv. matched_path: matched_rules.json. Returns: report dict. """ report = build_simulation_report(outcomes_path, matched_path) MATCHING_SIMULATION_JSON.parent.mkdir(parents=True, exist_ok=True) MATCHING_SIMULATION_JSON.write_text( json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8", ) write_simulation_html(report, MATCHING_SIMULATION_HTML) go = report["go_no_go"]["go"] print(f"[시뮬] 저장: {MATCHING_SIMULATION_JSON}") print(f"[시뮬] 저장: {MATCHING_SIMULATION_HTML}") print(f"[시뮬] Go/No-Go: {'GO' if go else 'NO-GO'}") for c in report["go_no_go"].get("checks", []): mark = "OK" if c["pass"] else "NG" print( f" [{mark}] {c['rule_id']}: holdout EV={c['holdout_ev']} " f"WF+={c['wf_positive_ratio']} fee2x EV={c['fee_stress_ev']}" ) cal = report.get("gt_portfolio_calibration") or {} port = cal.get("portfolio") or {} if port.get("asset_ratio") is not None: met = cal.get("targets_met", port.get("target_met_90")) print( f"[시뮬] GT 총자산 대비 leg subset 비율: {port['asset_ratio']:.2%} " f"({port.get('legs_covered')}/{port.get('legs_total')} leg) " f"목표90%={'달성' if met else '미달'}" ) return report