GT MTF 프로필·캘리브레이션과 04 매칭/시뮬/실거래 파이프라인을 추가한다.

3분~일봉 GT 타점 분석(03c), leg 체결 순서 수정, 총자산 90% 검증 루프,
walk-forward Go/No-Go 시뮬, monitor·live_trader 및 reference 문서를 포함한다.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-05-31 11:27:50 +09:00
parent b52d61b777
commit 2cb67c42b3
47 changed files with 5956 additions and 209 deletions

View File

@@ -0,0 +1,514 @@
"""
GT 매수/매도 타점 MTF 프로필 분석 (3분~일봉 전 TF).
03b wide CSV에서 간격별·기법별 분포를 비교하고,
04 규칙 후보 생성용 피처 목록을 산출합니다.
"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
import numpy as np
import pandas as pd
from config import (
GENERAL_ANALYSIS_INTERVALS,
MATCH_PROFILE_MIN_SAMPLES,
MATCH_PROFILE_MIN_SEPARATION,
MATCH_PROFILE_TOP_GLOBAL,
MATCH_PROFILE_TOP_PER_TF,
)
from deepcoin.analysis.general_analysis_config import INTERVAL_PREFIX
from deepcoin.analysis.general_analysis_core import interval_tf_prefix
from deepcoin.matching.config import ANALYSIS_TRADES_CSV, META_COLS
from deepcoin.paths import ANALYSIS_GT_MTF_PROFILE_HTML, ANALYSIS_GT_MTF_PROFILE_JSON
def _feature_separation(
buy: pd.Series,
sell: pd.Series,
) -> float:
"""
매수·매도 GT 분포 간 분리도(Cohen 유사).
Args:
buy: 매수 타점 값.
sell: 매도 타점 값.
Returns:
분리도(비숫자·표본 부족 시 0).
"""
a = pd.to_numeric(buy, errors="coerce").dropna()
b = pd.to_numeric(sell, errors="coerce").dropna()
if len(a) < MATCH_PROFILE_MIN_SAMPLES or len(b) < MATCH_PROFILE_MIN_SAMPLES:
return 0.0
pooled = np.sqrt((a.var() + b.var()) / 2)
if pooled < 1e-9:
return abs(float(a.mean() - b.mean()))
return abs(float(a.mean() - b.mean())) / pooled
def _numeric_stats(series: pd.Series) -> dict[str, float | int]:
"""
숫자 컬럼 요약 통계.
Args:
series: 한 side GT 값.
Returns:
count, mean, median, q25, q75, std.
"""
s = pd.to_numeric(series, errors="coerce").dropna()
if s.empty:
return {"count": 0}
return {
"count": int(len(s)),
"mean": round(float(s.mean()), 4),
"median": round(float(s.median()), 4),
"q25": round(float(s.quantile(0.25)), 4),
"q75": round(float(s.quantile(0.75)), 4),
"std": round(float(s.std()), 4) if len(s) > 1 else 0.0,
}
def _categorical_stats(series: pd.Series) -> dict[str, Any]:
"""
범주형 컬럼 최빈값·비율.
Args:
series: GT 값.
Returns:
mode, mode_frac, value_counts 상위 5.
"""
s = series.dropna().astype(str)
if s.empty:
return {"count": 0}
vc = s.value_counts()
mode = str(vc.index[0])
return {
"count": int(len(s)),
"mode": mode,
"mode_frac": round(float(vc.iloc[0] / len(s)), 3),
"top": {str(k): int(v) for k, v in vc.head(5).items()},
}
def _parse_tf_column(col: str) -> tuple[str, int | None, str]:
"""
컬럼명에서 TF 접두사·간격·베이스명 추출.
Args:
col: 예 m3_ga_rsi, ga_align_timing_buy_score.
Returns:
(tf_label, interval_minutes|None, base_name).
"""
if col.startswith("ga_align_"):
return ("mtf_align", None, col)
prefixes = sorted(
set(INTERVAL_PREFIX.values()),
key=len,
reverse=True,
)
for p in prefixes:
if col.startswith(f"{p}_"):
inv = {v: k for k, v in INTERVAL_PREFIX.items()}
return (p, inv.get(p), col[len(p) + 1 :])
return ("other", None, col)
def _feature_family(base: str) -> str:
"""기법군 라벨."""
if base in ("bb_pos", "RSI", "macd_hist", "stoch_k", "stoch_d", "BB_Width"):
return "legacy"
if base.startswith("ga_align_"):
return "mtf_align"
if "pattern" in base:
return "pattern"
if "struct" in base or "elliott" in base or "wyckoff" in base or "fib_" in base:
return "wave_structure"
if "chart" in base:
return "chart"
if "volume" in base or "vp_" in base:
return "volume"
if "harmonic" in base:
return "harmonic"
if base.startswith("ga_"):
return "indicator"
return "other"
def discover_profile_columns(df: pd.DataFrame) -> list[str]:
"""
규칙·프로필 분석 대상 컬럼 목록.
Args:
df: 03b wide CSV DataFrame.
Returns:
META 제외·분석 가능 컬럼명.
"""
meta = set(META_COLS)
out: list[str] = []
for col in df.columns:
if col in meta:
continue
if df[col].notna().sum() < MATCH_PROFILE_MIN_SAMPLES:
continue
if pd.api.types.is_numeric_dtype(df[col]):
out.append(col)
continue
nuniq = df[col].dropna().astype(str).nunique()
if 1 < nuniq <= 20:
out.append(col)
return out
def _analyze_one_column(
buy: pd.DataFrame,
sell: pd.DataFrame,
col: str,
) -> dict[str, Any]:
"""
단일 컬럼 매수 vs 매도 GT 비교.
Args:
buy: 매수 행.
sell: 매도 행.
col: 컬럼명.
Returns:
분리도·통계·방향 힌트.
"""
tf_label, interval, base = _parse_tf_column(col)
family = _feature_family(base)
row: dict[str, Any] = {
"col": col,
"tf": tf_label,
"interval": interval,
"base": base,
"family": family,
"dtype": "numeric" if pd.api.types.is_numeric_dtype(buy[col]) else "categorical",
}
if row["dtype"] == "numeric":
row["buy"] = _numeric_stats(buy[col])
row["sell"] = _numeric_stats(sell[col])
sep = _feature_separation(buy[col], sell[col])
row["separation"] = round(sep, 4)
bm = row["buy"].get("median")
sm = row["sell"].get("median")
if bm is not None and sm is not None:
row["buy_lower_than_sell"] = bm < sm
else:
row["buy_lower_than_sell"] = None
else:
row["buy"] = _categorical_stats(buy[col])
row["sell"] = _categorical_stats(sell[col])
row["separation"] = 0.0
if row["buy"].get("mode") and row["sell"].get("mode"):
row["modes_differ"] = row["buy"]["mode"] != row["sell"]["mode"]
return row
def analyze_gt_mtf_profile(df: pd.DataFrame) -> dict[str, Any]:
"""
전 TF·전 컬럼 GT 매수/매도 프로필 분석.
Args:
df: general_analysis_trades.csv.
Returns:
JSON 직렬화 가능 분석 결과.
"""
buy = df[df["action"] == "buy"].copy()
sell = df[df["action"] == "sell"].copy()
cols = discover_profile_columns(df)
features: list[dict[str, Any]] = []
for col in cols:
features.append(_analyze_one_column(buy, sell, col))
numeric_feats = [f for f in features if f["dtype"] == "numeric"]
ranked = sorted(numeric_feats, key=lambda x: x["separation"], reverse=True)
by_interval: dict[str, dict[str, Any]] = {}
for iv in GENERAL_ANALYSIS_INTERVALS:
pfx = interval_tf_prefix(iv)
iv_feats = [f for f in numeric_feats if f["tf"] == pfx]
iv_ranked = sorted(iv_feats, key=lambda x: x["separation"], reverse=True)
buy_favor = [f for f in iv_ranked if f.get("buy_lower_than_sell") is True][:10]
sell_favor = [f for f in iv_ranked if f.get("buy_lower_than_sell") is False][:10]
by_interval[pfx] = {
"interval_minutes": iv,
"feature_count": len(iv_feats),
"top_separation": [
{"col": x["col"], "separation": x["separation"]}
for x in iv_ranked[:15]
],
"buy_favor_lower_median": [
{"col": x["col"], "separation": x["separation"]}
for x in buy_favor[:8]
],
"sell_favor_higher_median": [
{"col": x["col"], "separation": x["separation"]}
for x in sell_favor[:8]
],
}
align_feats = [f for f in features if f["family"] == "mtf_align"]
selected_buy = _select_side_features(ranked, "buy")
selected_sell = _select_side_features(ranked, "sell")
return {
"source_rows": int(len(df)),
"buy_gt_count": int(len(buy)),
"sell_gt_count": int(len(sell)),
"columns_analyzed": len(cols),
"intervals": list(GENERAL_ANALYSIS_INTERVALS),
"config": {
"top_per_tf": MATCH_PROFILE_TOP_PER_TF,
"top_global": MATCH_PROFILE_TOP_GLOBAL,
"min_separation": MATCH_PROFILE_MIN_SEPARATION,
"min_samples": MATCH_PROFILE_MIN_SAMPLES,
},
"global_top_separation": [
{
"col": x["col"],
"tf": x["tf"],
"family": x["family"],
"separation": x["separation"],
"buy_median": x["buy"].get("median"),
"sell_median": x["sell"].get("median"),
}
for x in ranked[:40]
],
"by_interval": by_interval,
"mtf_align": align_feats,
"selected_features": {
"buy": selected_buy,
"sell": selected_sell,
},
"features": features,
}
def _select_side_features(
ranked: list[dict[str, Any]],
side: str,
) -> list[str]:
"""
04 규칙용 피처 목록: TF별 상위 + 글로벌 상위.
Args:
ranked: separation 내림차순 numeric feature dicts.
side: buy | sell.
Returns:
컬럼명 리스트(중복 제거, 순서 유지).
"""
chosen: list[str] = []
seen: set[str] = set()
def add(col: str) -> None:
if col not in seen:
seen.add(col)
chosen.append(col)
for iv in GENERAL_ANALYSIS_INTERVALS:
pfx = interval_tf_prefix(iv)
iv_list = [
f
for f in ranked
if f["tf"] == pfx and f["separation"] >= MATCH_PROFILE_MIN_SEPARATION
]
if side == "buy":
iv_list.sort(
key=lambda x: (
x["separation"],
1 if x.get("buy_lower_than_sell") else 0,
),
reverse=True,
)
else:
iv_list.sort(
key=lambda x: (
x["separation"],
1 if x.get("buy_lower_than_sell") is False else 0,
),
reverse=True,
)
for f in iv_list[:MATCH_PROFILE_TOP_PER_TF]:
add(f["col"])
global_list = [f for f in ranked if f["separation"] >= MATCH_PROFILE_MIN_SEPARATION]
if side == "buy":
global_list.sort(
key=lambda x: (
x["separation"],
1 if x.get("buy_lower_than_sell") else 0,
),
reverse=True,
)
else:
global_list.sort(
key=lambda x: (
x["separation"],
1 if x.get("buy_lower_than_sell") is False else 0,
),
reverse=True,
)
for f in global_list[:MATCH_PROFILE_TOP_GLOBAL]:
add(f["col"])
for name in (
"ga_align_timing_buy_score",
"ga_align_timing_sell_score",
"ga_align_trend_score",
"ga_align_rsi_oversold_tf",
"ga_align_rsi_overbought_tf",
"ga_align_mtf_conflict",
):
add(name)
return chosen
def load_selected_features(
profile_path: Path | None = None,
) -> tuple[list[str], list[str]]:
"""
저장된 프로필 JSON에서 buy/sell 피처 목록 로드.
Args:
profile_path: gt_mtf_profile.json.
Returns:
(buy_features, sell_features). 없으면 빈 리스트.
"""
path = profile_path or ANALYSIS_GT_MTF_PROFILE_JSON
if not path.is_file():
return [], []
data = json.loads(path.read_text(encoding="utf-8"))
sel = data.get("selected_features") or {}
return list(sel.get("buy") or []), list(sel.get("sell") or [])
def run_gt_mtf_profile(
trades_csv: Path | None = None,
*,
write_json: bool = True,
write_html: bool = True,
) -> dict[str, Any]:
"""
03b CSV 분석 후 JSON/HTML 저장.
Args:
trades_csv: 입력 CSV.
write_json: JSON 저장 여부.
write_html: HTML 저장 여부.
Returns:
analyze_gt_mtf_profile 결과.
"""
path = trades_csv or ANALYSIS_TRADES_CSV
if not path.is_file():
raise FileNotFoundError(f"03b CSV 없음: {path}")
df = pd.read_csv(path)
analysis = analyze_gt_mtf_profile(df)
buy_n = len(analysis["selected_features"]["buy"])
sell_n = len(analysis["selected_features"]["sell"])
print(
f"[03c] GT MTF 프로필: 분석 {analysis['columns_analyzed']}"
f"→ 매수 피처 {buy_n}, 매도 피처 {sell_n}"
)
if write_json:
ANALYSIS_GT_MTF_PROFILE_JSON.parent.mkdir(parents=True, exist_ok=True)
ANALYSIS_GT_MTF_PROFILE_JSON.write_text(
json.dumps(analysis, ensure_ascii=False, indent=2),
encoding="utf-8",
)
print(f"[03c] 저장: {ANALYSIS_GT_MTF_PROFILE_JSON}")
if write_html:
write_gt_mtf_profile_html(analysis, ANALYSIS_GT_MTF_PROFILE_HTML)
print(f"[03c] 저장: {ANALYSIS_GT_MTF_PROFILE_HTML}")
return analysis
def write_gt_mtf_profile_html(
analysis: dict[str, Any],
html_path: Path,
) -> Path:
"""
TF별·글로벌 분리도 요약 HTML.
Args:
analysis: analyze_gt_mtf_profile 결과.
html_path: 출력 경로.
Returns:
html_path.
"""
html_path.parent.mkdir(parents=True, exist_ok=True)
def _rows_interval() -> str:
rows = ""
for pfx, block in analysis.get("by_interval", {}).items():
top = block.get("top_separation") or []
top_s = ", ".join(
f"{t['col'].split('_', 1)[-1][:20]}({t['separation']:.2f})"
for t in top[:5]
) or "-"
rows += (
f"<tr><td>{pfx}</td><td>{block.get('feature_count', 0)}</td>"
f"<td>{top_s}</td></tr>"
)
return rows
def _rows_global() -> str:
rows = ""
for item in analysis.get("global_top_separation") or []:
rows += (
f"<tr><td>{item['col']}</td><td>{item['tf']}</td>"
f"<td>{item['family']}</td><td>{item['separation']:.3f}</td>"
f"<td>{item.get('buy_median','')}</td><td>{item.get('sell_median','')}</td></tr>"
)
return rows
buy_feats = ", ".join(analysis["selected_features"]["buy"][:25])
sell_feats = ", ".join(analysis["selected_features"]["sell"][:25])
html = f"""<!DOCTYPE html>
<html lang="ko"><head><meta charset="utf-8"/>
<title>GT MTF 프로필 (3분~일봉)</title>
<style>
body {{ font-family: "Malgun Gothic", Arial, sans-serif; margin: 24px; background: #f5f5f5; color: #1e293b; }}
h1, h2 {{ color: #0f172a; }}
table {{ border-collapse: collapse; width: 100%; background: #fff; margin-bottom: 20px; font-size: 0.85rem; }}
th, td {{ border: 1px solid #e2e8f0; padding: 8px; text-align: left; }}
th {{ background: #e2e8f0; }}
p.note {{ font-size: 0.9rem; color: #475569; }}
code {{ font-size: 0.8rem; word-break: break-all; }}
</style></head><body>
<h1>Ground Truth MTF 타점 프로필</h1>
<p>매수 GT {analysis['buy_gt_count']}건 · 매도 GT {analysis['sell_gt_count']}건 ·
분석 컬럼 {analysis['columns_analyzed']}개 (3,5,10,15,30,60,240,1440분 + MTF 합성)</p>
<p class="note">분리도 = |mean_buy mean_sell| / pooled_std. TF별·글로벌 상위 피처로 04 규칙 후보를 생성합니다.</p>
<h2>간격별 분리도 상위 (요약)</h2>
<table><thead><tr><th>TF</th><th>숫자 피처 수</th><th>상위 5 (분리도)</th></tr></thead>
<tbody>{_rows_interval()}</tbody></table>
<h2>글로벌 분리도 Top 40</h2>
<table><thead><tr><th>컬럼</th><th>TF</th><th>기법군</th><th>분리도</th><th>매수 median</th><th>매도 median</th></tr></thead>
<tbody>{_rows_global()}</tbody></table>
<h2>04 규칙 선별용 피처 (발췌)</h2>
<p><strong>매수</strong><br/><code>{buy_feats}</code></p>
<p><strong>매도</strong><br/><code>{sell_feats}</code></p>
</body></html>"""
html_path.write_text(html, encoding="utf-8")
return html_path