3분~일봉 GT 타점 분석(03c), leg 체결 순서 수정, 총자산 90% 검증 루프, walk-forward Go/No-Go 시뮬, monitor·live_trader 및 reference 문서를 포함한다. Co-authored-by: Cursor <cursoragent@cursor.com>
419 lines
13 KiB
Python
419 lines
13 KiB
Python
"""
|
|
04-1: GT 스냅샷(03b)에서 규칙 후보 생성.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
from config import (
|
|
MATCH_INCLUDE_ATOMIC,
|
|
MATCH_INCLUDE_MTF_CROSS,
|
|
MATCH_INCLUDE_WIDE_RULES,
|
|
MATCH_PROFILE_QUANTILE_HI,
|
|
MATCH_PROFILE_QUANTILE_LO,
|
|
MATCH_PROFILE_TIGHT_HI,
|
|
MATCH_PROFILE_TIGHT_LO,
|
|
)
|
|
from deepcoin.analysis.general_analysis_config import GENERAL_ANALYSIS_INTERVALS
|
|
from deepcoin.analysis.general_analysis_core import interval_tf_prefix
|
|
from deepcoin.matching.config import (
|
|
ANALYSIS_TRADES_CSV,
|
|
BUY_PROFILE_FEATURES,
|
|
SELL_PROFILE_FEATURES,
|
|
)
|
|
from deepcoin.matching.gt_mtf_profile import (
|
|
analyze_gt_mtf_profile,
|
|
load_selected_features,
|
|
)
|
|
from deepcoin.paths import (
|
|
ANALYSIS_GT_CALIBRATION_JSON,
|
|
ANALYSIS_GT_MTF_PROFILE_JSON,
|
|
)
|
|
|
|
|
|
def _feature_separation(
|
|
buy: pd.DataFrame,
|
|
sell: pd.DataFrame,
|
|
col: str,
|
|
) -> float:
|
|
"""
|
|
매수·매도 GT 분포 간 분리도(절대 평균차/합동표준편차)를 계산합니다.
|
|
|
|
Args:
|
|
buy: 매수 타점 행.
|
|
sell: 매도 타점 행.
|
|
col: 컬럼명.
|
|
|
|
Returns:
|
|
분리도(숫자형만, 그 외 0).
|
|
"""
|
|
if col not in buy.columns or not pd.api.types.is_numeric_dtype(buy[col]):
|
|
return 0.0
|
|
a = pd.to_numeric(buy[col], errors="coerce").dropna()
|
|
b = pd.to_numeric(sell[col], errors="coerce").dropna()
|
|
if len(a) < 5 or len(b) < 5:
|
|
return 0.0
|
|
pooled = np.sqrt((a.var() + b.var()) / 2)
|
|
if pooled < 1e-9:
|
|
return abs(float(a.mean() - b.mean()))
|
|
return abs(float(a.mean() - b.mean())) / pooled
|
|
|
|
|
|
def _condition_from_series(series: pd.Series, side: str) -> dict[str, Any] | None:
|
|
"""
|
|
한 컬럼의 GT 분포에서 단일 조건을 추출합니다.
|
|
|
|
Args:
|
|
series: 해당 side 타점 값.
|
|
side: buy | sell (설명용).
|
|
|
|
Returns:
|
|
조건 dict 또는 None.
|
|
"""
|
|
col_name = series.name
|
|
if series.dtype == object or series.dtype.name == "string":
|
|
mode = series.dropna().astype(str).mode()
|
|
if mode.empty:
|
|
return None
|
|
return {"col": col_name, "op": "eq", "value": str(mode.iloc[0])}
|
|
|
|
s = pd.to_numeric(series, errors="coerce").dropna()
|
|
if len(s) < 10:
|
|
return None
|
|
|
|
if set(s.unique()).issubset({0, 1, 0.0, 1.0}):
|
|
frac = float(s.mean())
|
|
if frac >= 0.55:
|
|
return {"col": col_name, "op": "eq_int", "value": 1}
|
|
if frac <= 0.45:
|
|
return {"col": col_name, "op": "eq_int", "value": 0}
|
|
return None
|
|
|
|
lo = float(s.quantile(MATCH_PROFILE_QUANTILE_LO))
|
|
hi = float(s.quantile(MATCH_PROFILE_QUANTILE_HI))
|
|
if lo >= hi:
|
|
return None
|
|
return {"col": col_name, "op": "between", "lo": lo, "hi": hi}
|
|
|
|
|
|
def _condition_tight(series: pd.Series) -> dict[str, Any] | None:
|
|
"""
|
|
q35~q65 좁은 구간 조건.
|
|
|
|
Args:
|
|
series: GT 부분집합 값.
|
|
|
|
Returns:
|
|
between 조건 또는 None.
|
|
"""
|
|
s = pd.to_numeric(series, errors="coerce").dropna()
|
|
if len(s) < 10:
|
|
return None
|
|
lo = float(s.quantile(MATCH_PROFILE_TIGHT_LO))
|
|
hi = float(s.quantile(MATCH_PROFILE_TIGHT_HI))
|
|
if lo >= hi:
|
|
return None
|
|
return {"col": series.name, "op": "between", "lo": lo, "hi": hi}
|
|
|
|
|
|
def _contrast_conditions(
|
|
buy: pd.DataFrame,
|
|
sell: pd.DataFrame,
|
|
col: str,
|
|
side: str,
|
|
) -> list[dict[str, Any]]:
|
|
"""
|
|
매수·매도 GT 분리가 큰 컬럼에 대해 쪽별 타이트 AND 대조 조건.
|
|
|
|
Args:
|
|
buy: 매수 GT.
|
|
sell: 매도 GT.
|
|
col: 컬럼명.
|
|
side: buy | sell.
|
|
|
|
Returns:
|
|
조건 리스트(비어 있을 수 있음).
|
|
"""
|
|
if col not in buy.columns or not pd.api.types.is_numeric_dtype(buy[col]):
|
|
return []
|
|
b = pd.to_numeric(buy[col], errors="coerce").dropna()
|
|
s = pd.to_numeric(sell[col], errors="coerce").dropna()
|
|
if len(b) < 10 or len(s) < 10:
|
|
return []
|
|
tight = _condition_tight(b if side == "buy" else s)
|
|
if tight is None:
|
|
return []
|
|
conds = [tight]
|
|
if side == "buy" and float(b.median()) < float(s.median()):
|
|
conds.append({"col": col, "op": "lte", "value": float(s.quantile(0.40))})
|
|
elif side == "sell" and float(b.median()) < float(s.median()):
|
|
conds.append({"col": col, "op": "gte", "value": float(b.quantile(0.60))})
|
|
return conds
|
|
|
|
|
|
def _resolve_profile_features(
|
|
trades_csv: Path,
|
|
df: pd.DataFrame,
|
|
) -> tuple[list[str], list[str], dict[str, Any] | None]:
|
|
"""
|
|
03c 프로필 JSON 갱신 후 buy/sell 피처 목록 반환.
|
|
|
|
Args:
|
|
trades_csv: 03b CSV 경로.
|
|
df: 동일 CSV DataFrame.
|
|
|
|
Returns:
|
|
(buy_features, sell_features, profile_analysis 또는 None).
|
|
"""
|
|
profile_path = ANALYSIS_GT_MTF_PROFILE_JSON
|
|
need_run = not profile_path.is_file()
|
|
if not need_run and profile_path.stat().st_mtime < trades_csv.stat().st_mtime:
|
|
need_run = True
|
|
analysis: dict[str, Any] | None = None
|
|
if need_run:
|
|
analysis = analyze_gt_mtf_profile(df)
|
|
profile_path.parent.mkdir(parents=True, exist_ok=True)
|
|
profile_path.write_text(
|
|
json.dumps(analysis, ensure_ascii=False, indent=2),
|
|
encoding="utf-8",
|
|
)
|
|
from deepcoin.matching.gt_mtf_profile import write_gt_mtf_profile_html
|
|
from deepcoin.paths import ANALYSIS_GT_MTF_PROFILE_HTML
|
|
|
|
write_gt_mtf_profile_html(analysis, ANALYSIS_GT_MTF_PROFILE_HTML)
|
|
print(f"[04-1] 03c GT MTF 프로필 갱신: {profile_path}")
|
|
buy_f, sell_f = load_selected_features(profile_path)
|
|
if not buy_f:
|
|
buy_f = list(BUY_PROFILE_FEATURES)
|
|
if not sell_f:
|
|
sell_f = list(SELL_PROFILE_FEATURES)
|
|
return buy_f, sell_f, analysis
|
|
|
|
|
|
def _mtf_cross_conditions(
|
|
buy: pd.DataFrame,
|
|
sell: pd.DataFrame,
|
|
features: list[str],
|
|
side: str,
|
|
) -> list[dict[str, Any]]:
|
|
"""
|
|
각 TF에서 분리도 1위 컬럼 조건을 AND (크로스-TF 복합).
|
|
|
|
Args:
|
|
buy: 매수 GT.
|
|
sell: 매도 GT.
|
|
features: 후보 컬럼.
|
|
side: buy | sell.
|
|
|
|
Returns:
|
|
조건 리스트(2개 이상일 때만 의미).
|
|
"""
|
|
subset = buy if side == "buy" else sell
|
|
conds: list[dict[str, Any]] = []
|
|
for iv in GENERAL_ANALYSIS_INTERVALS:
|
|
pfx = interval_tf_prefix(iv)
|
|
iv_feats = [f for f in features if f.startswith(f"{pfx}_") and f in subset.columns]
|
|
if not iv_feats:
|
|
continue
|
|
best = max(iv_feats, key=lambda c: _feature_separation(buy, sell, c))
|
|
cond = _condition_from_series(subset[best], side)
|
|
if cond:
|
|
conds.append(cond)
|
|
return conds
|
|
|
|
|
|
def build_rule_candidates(
|
|
trades_csv: Path | None = None,
|
|
) -> dict[str, Any]:
|
|
"""
|
|
03b CSV + 03c MTF 프로필에서 매수·매도별 규칙 후보를 생성합니다.
|
|
|
|
Args:
|
|
trades_csv: general_analysis_trades.csv 경로.
|
|
|
|
Returns:
|
|
rule_candidates 메타·rules 리스트 dict.
|
|
"""
|
|
path = trades_csv or ANALYSIS_TRADES_CSV
|
|
if not path.is_file():
|
|
raise FileNotFoundError(f"03b CSV 없음: {path} — scripts/03_analyze_trades.py 먼저 실행")
|
|
|
|
df = pd.read_csv(path)
|
|
buy = df[df["action"] == "buy"].copy()
|
|
sell = df[df["action"] == "sell"].copy()
|
|
buy_features, sell_features, profile = _resolve_profile_features(path, df)
|
|
rules: list[dict[str, Any]] = []
|
|
rid = 0
|
|
|
|
for side, subset, features in (
|
|
("buy", buy, buy_features),
|
|
("sell", sell, sell_features),
|
|
):
|
|
skip_cols = {
|
|
"ga_align_trend_score", # 분포가 넓어 전구간 발화 과다
|
|
}
|
|
if MATCH_INCLUDE_ATOMIC:
|
|
for feat in features:
|
|
if feat not in df.columns or feat in skip_cols:
|
|
continue
|
|
cond = _condition_from_series(subset[feat], side)
|
|
if cond is None:
|
|
continue
|
|
rules.append(
|
|
{
|
|
"rule_id": f"{side}_a{rid:03d}_{feat}",
|
|
"side": side,
|
|
"kind": "atomic",
|
|
"conditions": [cond],
|
|
"profile_col": feat,
|
|
}
|
|
)
|
|
rid += 1
|
|
|
|
ranked = sorted(
|
|
[f for f in features if f in df.columns],
|
|
key=lambda c: _feature_separation(buy, sell, c),
|
|
reverse=True,
|
|
)
|
|
ranked_top = ranked[:5]
|
|
compound_conds: list[dict[str, Any]] = []
|
|
for feat in ranked_top[:3]:
|
|
cond = _condition_from_series(subset[feat], side)
|
|
if cond:
|
|
compound_conds.append(cond)
|
|
if len(compound_conds) >= 2:
|
|
rules.append(
|
|
{
|
|
"rule_id": f"{side}_compound_top3",
|
|
"side": side,
|
|
"kind": "compound",
|
|
"conditions": compound_conds,
|
|
"profile_cols": ranked_top[:3],
|
|
}
|
|
)
|
|
|
|
tight_conds: list[dict[str, Any]] = []
|
|
for feat in ranked_top[:4]:
|
|
if feat not in subset.columns:
|
|
continue
|
|
tc = _condition_tight(subset[feat])
|
|
if tc:
|
|
tight_conds.append(tc)
|
|
if len(tight_conds) >= 2:
|
|
rules.append(
|
|
{
|
|
"rule_id": f"{side}_compound_tight",
|
|
"side": side,
|
|
"kind": "compound_tight",
|
|
"conditions": tight_conds,
|
|
}
|
|
)
|
|
|
|
if ranked_top:
|
|
c0 = ranked_top[0]
|
|
contrast = _contrast_conditions(buy, sell, c0, side)
|
|
if len(contrast) >= 2:
|
|
rules.append(
|
|
{
|
|
"rule_id": f"{side}_contrast_{c0}",
|
|
"side": side,
|
|
"kind": "contrast",
|
|
"conditions": contrast,
|
|
}
|
|
)
|
|
|
|
if MATCH_INCLUDE_MTF_CROSS:
|
|
cross = _mtf_cross_conditions(buy, sell, features, side)
|
|
if len(cross) >= 3:
|
|
rules.append(
|
|
{
|
|
"rule_id": f"{side}_mtf_cross_all_tf",
|
|
"side": side,
|
|
"kind": "mtf_cross",
|
|
"conditions": cross,
|
|
}
|
|
)
|
|
|
|
if MATCH_INCLUDE_WIDE_RULES:
|
|
for feat in ranked_top[:2]:
|
|
if feat not in subset.columns:
|
|
continue
|
|
s = pd.to_numeric(subset[feat], errors="coerce").dropna()
|
|
if len(s) < 10:
|
|
continue
|
|
lo, hi = float(s.quantile(0.10)), float(s.quantile(0.90))
|
|
if lo < hi:
|
|
rules.append(
|
|
{
|
|
"rule_id": f"{side}_wide_{feat}",
|
|
"side": side,
|
|
"kind": "wide",
|
|
"conditions": [
|
|
{"col": feat, "op": "between", "lo": lo, "hi": hi}
|
|
],
|
|
}
|
|
)
|
|
|
|
if ANALYSIS_GT_CALIBRATION_JSON.is_file():
|
|
cal = json.loads(ANALYSIS_GT_CALIBRATION_JSON.read_text(encoding="utf-8"))
|
|
cal_rules = cal.get("calibrated_rules") or []
|
|
if cal.get("final", {}).get("targets_met") and cal_rules:
|
|
rules = []
|
|
for cr in cal_rules:
|
|
if "logic" not in cr:
|
|
cr["logic"] = "and"
|
|
rules.append(cr)
|
|
print(f"[04-1] 캘리브레이션 규칙 적용(90% 달성) → {len(rules)}개")
|
|
else:
|
|
seen_ids = {r["rule_id"] for r in rules}
|
|
for cr in cal_rules:
|
|
if cr.get("rule_id") not in seen_ids:
|
|
if "logic" not in cr:
|
|
cr["logic"] = "and"
|
|
rules.append(cr)
|
|
seen_ids.add(cr["rule_id"])
|
|
print(f"[04-1] 캘리브레이션 규칙 병합 → 총 {len(rules)}개")
|
|
|
|
out = {
|
|
"source": str(path),
|
|
"profile_json": str(ANALYSIS_GT_MTF_PROFILE_JSON),
|
|
"calibration_json": str(ANALYSIS_GT_CALIBRATION_JSON),
|
|
"buy_profile_features": buy_features[:50],
|
|
"sell_profile_features": sell_features[:50],
|
|
"buy_gt_count": int(len(buy)),
|
|
"sell_gt_count": int(len(sell)),
|
|
"rule_count": len(rules),
|
|
"rules": rules,
|
|
}
|
|
print(
|
|
f"[04-1] 규칙 후보 {len(rules)}개 "
|
|
f"(매수 GT {len(buy)}, 매도 GT {len(sell)})"
|
|
)
|
|
return out
|
|
|
|
|
|
def save_rule_candidates(
|
|
data: dict[str, Any],
|
|
out_path: Path,
|
|
) -> Path:
|
|
"""
|
|
rule_candidates.json 저장.
|
|
|
|
Args:
|
|
data: build_rule_candidates 결과.
|
|
out_path: 출력 경로.
|
|
|
|
Returns:
|
|
out_path.
|
|
"""
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
out_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
print(f"[04-1] 저장: {out_path}")
|
|
return out_path
|