GT MTF 프로필·캘리브레이션과 04 매칭/시뮬/실거래 파이프라인을 추가한다.

3분~일봉 GT 타점 분석(03c), leg 체결 순서 수정, 총자산 90% 검증 루프, walk-forward Go/No-Go 시뮬, monitor·live_trader 및 reference 문서를 포함한다. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-31 11:27:50 +09:00
parent b52d61b777
commit 2cb67c42b3
47 changed files with 5956 additions and 209 deletions
--- a/deepcoin/matching/profile_rules.py
+++ b/deepcoin/matching/profile_rules.py
@@ -0,0 +1,418 @@
+"""
+04-1: GT 스냅샷(03b)에서 규칙 후보 생성.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import pandas as pd
+
+from config import (
+    MATCH_INCLUDE_ATOMIC,
+    MATCH_INCLUDE_MTF_CROSS,
+    MATCH_INCLUDE_WIDE_RULES,
+    MATCH_PROFILE_QUANTILE_HI,
+    MATCH_PROFILE_QUANTILE_LO,
+    MATCH_PROFILE_TIGHT_HI,
+    MATCH_PROFILE_TIGHT_LO,
+)
+from deepcoin.analysis.general_analysis_config import GENERAL_ANALYSIS_INTERVALS
+from deepcoin.analysis.general_analysis_core import interval_tf_prefix
+from deepcoin.matching.config import (
+    ANALYSIS_TRADES_CSV,
+    BUY_PROFILE_FEATURES,
+    SELL_PROFILE_FEATURES,
+)
+from deepcoin.matching.gt_mtf_profile import (
+    analyze_gt_mtf_profile,
+    load_selected_features,
+)
+from deepcoin.paths import (
+    ANALYSIS_GT_CALIBRATION_JSON,
+    ANALYSIS_GT_MTF_PROFILE_JSON,
+)
+
+
+def _feature_separation(
+    buy: pd.DataFrame,
+    sell: pd.DataFrame,
+    col: str,
+) -> float:
+    """
+    매수·매도 GT 분포 간 분리도(절대 평균차/합동표준편차)를 계산합니다.
+
+    Args:
+        buy: 매수 타점 행.
+        sell: 매도 타점 행.
+        col: 컬럼명.
+
+    Returns:
+        분리도(숫자형만, 그 외 0).
+    """
+    if col not in buy.columns or not pd.api.types.is_numeric_dtype(buy[col]):
+        return 0.0
+    a = pd.to_numeric(buy[col], errors="coerce").dropna()
+    b = pd.to_numeric(sell[col], errors="coerce").dropna()
+    if len(a) < 5 or len(b) < 5:
+        return 0.0
+    pooled = np.sqrt((a.var() + b.var()) / 2)
+    if pooled < 1e-9:
+        return abs(float(a.mean() - b.mean()))
+    return abs(float(a.mean() - b.mean())) / pooled
+
+
+def _condition_from_series(series: pd.Series, side: str) -> dict[str, Any] | None:
+    """
+    한 컬럼의 GT 분포에서 단일 조건을 추출합니다.
+
+    Args:
+        series: 해당 side 타점 값.
+        side: buy | sell (설명용).
+
+    Returns:
+        조건 dict 또는 None.
+    """
+    col_name = series.name
+    if series.dtype == object or series.dtype.name == "string":
+        mode = series.dropna().astype(str).mode()
+        if mode.empty:
+            return None
+        return {"col": col_name, "op": "eq", "value": str(mode.iloc[0])}
+
+    s = pd.to_numeric(series, errors="coerce").dropna()
+    if len(s) < 10:
+        return None
+
+    if set(s.unique()).issubset({0, 1, 0.0, 1.0}):
+        frac = float(s.mean())
+        if frac >= 0.55:
+            return {"col": col_name, "op": "eq_int", "value": 1}
+        if frac <= 0.45:
+            return {"col": col_name, "op": "eq_int", "value": 0}
+        return None
+
+    lo = float(s.quantile(MATCH_PROFILE_QUANTILE_LO))
+    hi = float(s.quantile(MATCH_PROFILE_QUANTILE_HI))
+    if lo >= hi:
+        return None
+    return {"col": col_name, "op": "between", "lo": lo, "hi": hi}
+
+
+def _condition_tight(series: pd.Series) -> dict[str, Any] | None:
+    """
+    q35~q65 좁은 구간 조건.
+
+    Args:
+        series: GT 부분집합 값.
+
+    Returns:
+        between 조건 또는 None.
+    """
+    s = pd.to_numeric(series, errors="coerce").dropna()
+    if len(s) < 10:
+        return None
+    lo = float(s.quantile(MATCH_PROFILE_TIGHT_LO))
+    hi = float(s.quantile(MATCH_PROFILE_TIGHT_HI))
+    if lo >= hi:
+        return None
+    return {"col": series.name, "op": "between", "lo": lo, "hi": hi}
+
+
+def _contrast_conditions(
+    buy: pd.DataFrame,
+    sell: pd.DataFrame,
+    col: str,
+    side: str,
+) -> list[dict[str, Any]]:
+    """
+    매수·매도 GT 분리가 큰 컬럼에 대해 쪽별 타이트 AND 대조 조건.
+
+    Args:
+        buy: 매수 GT.
+        sell: 매도 GT.
+        col: 컬럼명.
+        side: buy | sell.
+
+    Returns:
+        조건 리스트(비어 있을 수 있음).
+    """
+    if col not in buy.columns or not pd.api.types.is_numeric_dtype(buy[col]):
+        return []
+    b = pd.to_numeric(buy[col], errors="coerce").dropna()
+    s = pd.to_numeric(sell[col], errors="coerce").dropna()
+    if len(b) < 10 or len(s) < 10:
+        return []
+    tight = _condition_tight(b if side == "buy" else s)
+    if tight is None:
+        return []
+    conds = [tight]
+    if side == "buy" and float(b.median()) < float(s.median()):
+        conds.append({"col": col, "op": "lte", "value": float(s.quantile(0.40))})
+    elif side == "sell" and float(b.median()) < float(s.median()):
+        conds.append({"col": col, "op": "gte", "value": float(b.quantile(0.60))})
+    return conds
+
+
+def _resolve_profile_features(
+    trades_csv: Path,
+    df: pd.DataFrame,
+) -> tuple[list[str], list[str], dict[str, Any] | None]:
+    """
+    03c 프로필 JSON 갱신 후 buy/sell 피처 목록 반환.
+
+    Args:
+        trades_csv: 03b CSV 경로.
+        df: 동일 CSV DataFrame.
+
+    Returns:
+        (buy_features, sell_features, profile_analysis 또는 None).
+    """
+    profile_path = ANALYSIS_GT_MTF_PROFILE_JSON
+    need_run = not profile_path.is_file()
+    if not need_run and profile_path.stat().st_mtime < trades_csv.stat().st_mtime:
+        need_run = True
+    analysis: dict[str, Any] | None = None
+    if need_run:
+        analysis = analyze_gt_mtf_profile(df)
+        profile_path.parent.mkdir(parents=True, exist_ok=True)
+        profile_path.write_text(
+            json.dumps(analysis, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+        from deepcoin.matching.gt_mtf_profile import write_gt_mtf_profile_html
+        from deepcoin.paths import ANALYSIS_GT_MTF_PROFILE_HTML
+
+        write_gt_mtf_profile_html(analysis, ANALYSIS_GT_MTF_PROFILE_HTML)
+        print(f"[04-1] 03c GT MTF 프로필 갱신: {profile_path}")
+    buy_f, sell_f = load_selected_features(profile_path)
+    if not buy_f:
+        buy_f = list(BUY_PROFILE_FEATURES)
+    if not sell_f:
+        sell_f = list(SELL_PROFILE_FEATURES)
+    return buy_f, sell_f, analysis
+
+
+def _mtf_cross_conditions(
+    buy: pd.DataFrame,
+    sell: pd.DataFrame,
+    features: list[str],
+    side: str,
+) -> list[dict[str, Any]]:
+    """
+    각 TF에서 분리도 1위 컬럼 조건을 AND (크로스-TF 복합).
+
+    Args:
+        buy: 매수 GT.
+        sell: 매도 GT.
+        features: 후보 컬럼.
+        side: buy | sell.
+
+    Returns:
+        조건 리스트(2개 이상일 때만 의미).
+    """
+    subset = buy if side == "buy" else sell
+    conds: list[dict[str, Any]] = []
+    for iv in GENERAL_ANALYSIS_INTERVALS:
+        pfx = interval_tf_prefix(iv)
+        iv_feats = [f for f in features if f.startswith(f"{pfx}_") and f in subset.columns]
+        if not iv_feats:
+            continue
+        best = max(iv_feats, key=lambda c: _feature_separation(buy, sell, c))
+        cond = _condition_from_series(subset[best], side)
+        if cond:
+            conds.append(cond)
+    return conds
+
+
+def build_rule_candidates(
+    trades_csv: Path | None = None,
+) -> dict[str, Any]:
+    """
+    03b CSV + 03c MTF 프로필에서 매수·매도별 규칙 후보를 생성합니다.
+
+    Args:
+        trades_csv: general_analysis_trades.csv 경로.
+
+    Returns:
+        rule_candidates 메타·rules 리스트 dict.
+    """
+    path = trades_csv or ANALYSIS_TRADES_CSV
+    if not path.is_file():
+        raise FileNotFoundError(f"03b CSV 없음: {path} — scripts/03_analyze_trades.py 먼저 실행")
+
+    df = pd.read_csv(path)
+    buy = df[df["action"] == "buy"].copy()
+    sell = df[df["action"] == "sell"].copy()
+    buy_features, sell_features, profile = _resolve_profile_features(path, df)
+    rules: list[dict[str, Any]] = []
+    rid = 0
+
+    for side, subset, features in (
+        ("buy", buy, buy_features),
+        ("sell", sell, sell_features),
+    ):
+        skip_cols = {
+            "ga_align_trend_score",  # 분포가 넓어 전구간 발화 과다
+        }
+        if MATCH_INCLUDE_ATOMIC:
+            for feat in features:
+                if feat not in df.columns or feat in skip_cols:
+                    continue
+                cond = _condition_from_series(subset[feat], side)
+                if cond is None:
+                    continue
+                rules.append(
+                    {
+                        "rule_id": f"{side}_a{rid:03d}_{feat}",
+                        "side": side,
+                        "kind": "atomic",
+                        "conditions": [cond],
+                        "profile_col": feat,
+                    }
+                )
+                rid += 1
+
+        ranked = sorted(
+            [f for f in features if f in df.columns],
+            key=lambda c: _feature_separation(buy, sell, c),
+            reverse=True,
+        )
+        ranked_top = ranked[:5]
+        compound_conds: list[dict[str, Any]] = []
+        for feat in ranked_top[:3]:
+            cond = _condition_from_series(subset[feat], side)
+            if cond:
+                compound_conds.append(cond)
+        if len(compound_conds) >= 2:
+            rules.append(
+                {
+                    "rule_id": f"{side}_compound_top3",
+                    "side": side,
+                    "kind": "compound",
+                    "conditions": compound_conds,
+                    "profile_cols": ranked_top[:3],
+                }
+            )
+
+        tight_conds: list[dict[str, Any]] = []
+        for feat in ranked_top[:4]:
+            if feat not in subset.columns:
+                continue
+            tc = _condition_tight(subset[feat])
+            if tc:
+                tight_conds.append(tc)
+        if len(tight_conds) >= 2:
+            rules.append(
+                {
+                    "rule_id": f"{side}_compound_tight",
+                    "side": side,
+                    "kind": "compound_tight",
+                    "conditions": tight_conds,
+                }
+            )
+
+        if ranked_top:
+            c0 = ranked_top[0]
+            contrast = _contrast_conditions(buy, sell, c0, side)
+            if len(contrast) >= 2:
+                rules.append(
+                    {
+                        "rule_id": f"{side}_contrast_{c0}",
+                        "side": side,
+                        "kind": "contrast",
+                        "conditions": contrast,
+                    }
+                )
+
+        if MATCH_INCLUDE_MTF_CROSS:
+            cross = _mtf_cross_conditions(buy, sell, features, side)
+            if len(cross) >= 3:
+                rules.append(
+                    {
+                        "rule_id": f"{side}_mtf_cross_all_tf",
+                        "side": side,
+                        "kind": "mtf_cross",
+                        "conditions": cross,
+                    }
+                )
+
+        if MATCH_INCLUDE_WIDE_RULES:
+            for feat in ranked_top[:2]:
+                if feat not in subset.columns:
+                    continue
+                s = pd.to_numeric(subset[feat], errors="coerce").dropna()
+                if len(s) < 10:
+                    continue
+                lo, hi = float(s.quantile(0.10)), float(s.quantile(0.90))
+                if lo < hi:
+                    rules.append(
+                        {
+                            "rule_id": f"{side}_wide_{feat}",
+                            "side": side,
+                            "kind": "wide",
+                            "conditions": [
+                                {"col": feat, "op": "between", "lo": lo, "hi": hi}
+                            ],
+                        }
+                    )
+
+    if ANALYSIS_GT_CALIBRATION_JSON.is_file():
+        cal = json.loads(ANALYSIS_GT_CALIBRATION_JSON.read_text(encoding="utf-8"))
+        cal_rules = cal.get("calibrated_rules") or []
+        if cal.get("final", {}).get("targets_met") and cal_rules:
+            rules = []
+            for cr in cal_rules:
+                if "logic" not in cr:
+                    cr["logic"] = "and"
+                rules.append(cr)
+            print(f"[04-1] 캘리브레이션 규칙 적용(90% 달성) → {len(rules)}개")
+        else:
+            seen_ids = {r["rule_id"] for r in rules}
+            for cr in cal_rules:
+                if cr.get("rule_id") not in seen_ids:
+                    if "logic" not in cr:
+                        cr["logic"] = "and"
+                    rules.append(cr)
+                    seen_ids.add(cr["rule_id"])
+            print(f"[04-1] 캘리브레이션 규칙 병합 → 총 {len(rules)}개")
+
+    out = {
+        "source": str(path),
+        "profile_json": str(ANALYSIS_GT_MTF_PROFILE_JSON),
+        "calibration_json": str(ANALYSIS_GT_CALIBRATION_JSON),
+        "buy_profile_features": buy_features[:50],
+        "sell_profile_features": sell_features[:50],
+        "buy_gt_count": int(len(buy)),
+        "sell_gt_count": int(len(sell)),
+        "rule_count": len(rules),
+        "rules": rules,
+    }
+    print(
+        f"[04-1] 규칙 후보 {len(rules)}개 "
+        f"(매수 GT {len(buy)}, 매도 GT {len(sell)})"
+    )
+    return out
+
+
+def save_rule_candidates(
+    data: dict[str, Any],
+    out_path: Path,
+) -> Path:
+    """
+    rule_candidates.json 저장.
+
+    Args:
+        data: build_rule_candidates 결과.
+        out_path: 출력 경로.
+
+    Returns:
+        out_path.
+    """
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"[04-1] 저장: {out_path}")
+    return out_path