미래 데이터를 쓰지 않는 causal 신호/tier와 전기간 복리 포트폴리오 비교로 GT 대비 sim_sized 검증 경로를 정리하고, 일한도·매수 상한·live_buy 스케일을 제거한다. Co-authored-by: Cursor <cursoragent@cursor.com>
335 lines
11 KiB
Python
335 lines
11 KiB
Python
"""
|
||
규칙 조건 벡터 평가·MTF 스캔 프레임 병합.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any
|
||
|
||
import numpy as np
|
||
import pandas as pd
|
||
|
||
from config import GENERAL_ANALYSIS_INTERVALS, MATCH_PRIMARY_INTERVAL
|
||
from deepcoin.analysis.general_analysis_core import interval_tf_prefix
|
||
from deepcoin.analysis.general_analysis_pipeline import general_analysis_enrich_bars
|
||
from config import (
|
||
ALIGN_RSI_CONFLICT_TIMING_HIGH,
|
||
ALIGN_RSI_CONFLICT_TIMING_LOW,
|
||
ALIGN_RSI_CONFLICT_TREND_HIGH,
|
||
ALIGN_RSI_CONFLICT_TREND_LOW,
|
||
ALIGN_RSI_OVERBOUGHT,
|
||
ALIGN_RSI_OVERSOLD,
|
||
TIMING_INTERVALS,
|
||
TREND_INTERVALS,
|
||
)
|
||
from deepcoin.analysis.general_analysis_core import ga_col
|
||
|
||
|
||
def _add_align_columns_vectorized(frame: pd.DataFrame) -> pd.DataFrame:
|
||
"""
|
||
스캔 프레임에 ga_align_* 컬럼을 벡터 연산으로 추가합니다.
|
||
|
||
Args:
|
||
frame: TF 접두사 컬럼이 포함된 DataFrame.
|
||
|
||
Returns:
|
||
align 컬럼이 추가된 DataFrame.
|
||
"""
|
||
out = frame.copy()
|
||
rsi_oversold = pd.Series(0, index=out.index, dtype=float)
|
||
rsi_overbought = pd.Series(0, index=out.index, dtype=float)
|
||
n_timing = 0
|
||
for iv in TIMING_INTERVALS:
|
||
p = interval_tf_prefix(iv)
|
||
rk = f"{p}_RSI"
|
||
if rk not in out.columns:
|
||
continue
|
||
n_timing += 1
|
||
rsi = pd.to_numeric(out[rk], errors="coerce")
|
||
rsi_oversold += (rsi < ALIGN_RSI_OVERSOLD).astype(int)
|
||
rsi_overbought += (rsi > ALIGN_RSI_OVERBOUGHT).astype(int)
|
||
|
||
trend_up = pd.Series(0, index=out.index, dtype=float)
|
||
trend_down = pd.Series(0, index=out.index, dtype=float)
|
||
n_trend = 0
|
||
for iv in TREND_INTERVALS:
|
||
p = interval_tf_prefix(iv)
|
||
sk = f"{p}_{ga_col('struct_trend')}"
|
||
if sk not in out.columns:
|
||
continue
|
||
n_trend += 1
|
||
st = out[sk].astype(str)
|
||
trend_up += (st == "up").astype(int)
|
||
trend_down += (st == "down").astype(int)
|
||
|
||
denom_t = max(n_timing, 1)
|
||
denom_r = max(n_trend, 1)
|
||
out["ga_align_rsi_oversold_tf"] = rsi_oversold
|
||
out["ga_align_rsi_overbought_tf"] = rsi_overbought
|
||
out["ga_align_trend_up_tf"] = trend_up
|
||
out["ga_align_trend_down_tf"] = trend_down
|
||
out["ga_align_timing_buy_score"] = (rsi_oversold / denom_t).round(3)
|
||
out["ga_align_timing_sell_score"] = (rsi_overbought / denom_t).round(3)
|
||
out["ga_align_trend_score"] = ((trend_up - trend_down) / denom_r).round(3)
|
||
|
||
conflict = pd.Series(0, index=out.index, dtype=int)
|
||
m3_rsi = out.get("m3_RSI")
|
||
d1_rsi = out.get("d1_RSI")
|
||
if m3_rsi is not None and d1_rsi is not None:
|
||
m3v = pd.to_numeric(m3_rsi, errors="coerce")
|
||
d1v = pd.to_numeric(d1_rsi, errors="coerce")
|
||
conflict = (
|
||
((m3v < ALIGN_RSI_CONFLICT_TIMING_LOW) & (d1v > ALIGN_RSI_CONFLICT_TREND_HIGH))
|
||
| ((m3v > ALIGN_RSI_CONFLICT_TIMING_HIGH) & (d1v < ALIGN_RSI_CONFLICT_TREND_LOW))
|
||
).astype(int)
|
||
out["ga_align_mtf_conflict"] = conflict
|
||
return out
|
||
|
||
|
||
def _scalar_float(val: Any) -> float:
|
||
"""Series/ndarray 스칼라를 float로 변환."""
|
||
if isinstance(val, pd.Series):
|
||
val = val.iloc[0]
|
||
return float(val)
|
||
|
||
|
||
def conditions_columns(rules: list[dict[str, Any]]) -> set[str]:
|
||
"""
|
||
규칙 목록에서 참조하는 컬럼명 집합을 반환합니다.
|
||
|
||
Args:
|
||
rules: rule_candidates 항목 리스트.
|
||
|
||
Returns:
|
||
컬럼명 set.
|
||
"""
|
||
cols: set[str] = set()
|
||
for rule in rules:
|
||
for cond in rule.get("conditions", []):
|
||
c = cond.get("col")
|
||
if c:
|
||
cols.add(c)
|
||
return cols
|
||
|
||
|
||
def build_mtf_scan_frame(
|
||
frames: dict[int, pd.DataFrame],
|
||
needed_cols: set[str],
|
||
) -> pd.DataFrame:
|
||
"""
|
||
주간격(m3) 인덱스에 필요 컬럼만 merge_asof로 붙인 스캔용 DataFrame을 만듭니다.
|
||
|
||
Args:
|
||
frames: interval → OHLCV.
|
||
needed_cols: 규칙 평가에 필요한 컬럼명.
|
||
|
||
Returns:
|
||
m3 인덱스 wide DataFrame (close 포함).
|
||
"""
|
||
primary = MATCH_PRIMARY_INTERVAL
|
||
raw = frames.get(primary)
|
||
if raw is None or raw.empty:
|
||
raise RuntimeError(f"주간격 {primary}분 데이터 없음")
|
||
|
||
print(f"[04b] Phase A: 8TF enrich (스캔용)...")
|
||
enriched: dict[int, pd.DataFrame] = {}
|
||
for iv in GENERAL_ANALYSIS_INTERVALS:
|
||
r = frames.get(iv)
|
||
if r is None or r.empty:
|
||
continue
|
||
label = interval_tf_prefix(iv)
|
||
print(f" enrich {label} ({len(r):,}봉)...")
|
||
enriched[iv] = general_analysis_enrich_bars(r, iv, full_context=True)
|
||
|
||
base = enriched[primary].copy()
|
||
if not isinstance(base.index, pd.DatetimeIndex):
|
||
base.index = pd.to_datetime(base.index)
|
||
base = base.sort_index()
|
||
out = pd.DataFrame(index=base.index)
|
||
close_col = "close" if "close" in base.columns else "Close"
|
||
out["close"] = base[close_col].astype(float)
|
||
|
||
def _source_col(prefixed: str, prefix: str, ef: pd.DataFrame) -> str | None:
|
||
"""m3_RSI → RSI, m60_ga_struct_trend → ga_struct_trend."""
|
||
if not prefixed.startswith(f"{prefix}_"):
|
||
return None
|
||
suffix = prefixed[len(prefix) + 1 :]
|
||
if suffix in ef.columns:
|
||
return suffix
|
||
return None
|
||
|
||
for iv in GENERAL_ANALYSIS_INTERVALS:
|
||
ef = enriched.get(iv)
|
||
if ef is None:
|
||
continue
|
||
p = interval_tf_prefix(iv)
|
||
for col in needed_cols:
|
||
if col in out.columns or not col.startswith(f"{p}_"):
|
||
continue
|
||
src = _source_col(col, p, ef)
|
||
if src is None:
|
||
continue
|
||
if iv == primary:
|
||
out[col] = ef[src].reindex(out.index)
|
||
else:
|
||
sub = ef[[src]].copy()
|
||
if not isinstance(sub.index, pd.DatetimeIndex):
|
||
sub.index = pd.to_datetime(sub.index)
|
||
sub = sub.sort_index().rename(columns={src: col})
|
||
merged = pd.merge_asof(
|
||
out.reset_index(names="_ts"),
|
||
sub.reset_index(names="_ts"),
|
||
on="_ts",
|
||
direction="backward",
|
||
).set_index("_ts")
|
||
out[col] = merged[col].values
|
||
|
||
align_needed = [c for c in needed_cols if c.startswith("ga_align_")]
|
||
if align_needed:
|
||
out = _add_align_columns_vectorized(out)
|
||
|
||
gt_needed = [c for c in needed_cols if c.startswith("gt_")]
|
||
bb_in_rules = "bb_pos" in needed_cols
|
||
if gt_needed or bb_in_rules:
|
||
ef = enriched[primary]
|
||
for src in ("Low", "High", "low", "high", "bb_pos", "Open", "Volume"):
|
||
if src in ef.columns and src not in out.columns:
|
||
out[src] = ef[src].reindex(out.index)
|
||
if "Low" not in out.columns and "low" in out.columns:
|
||
out["Low"] = out["low"]
|
||
if "High" not in out.columns and "high" in out.columns:
|
||
out["High"] = out["high"]
|
||
from deepcoin.ground_truth.gt_signal_rules import enrich_scan_frame_gt_signals
|
||
|
||
# 시뮬·live 스캔: 타점 판단은 항상 인과적 (GT 정답 생성은 ground_truth.py 별도)
|
||
out = enrich_scan_frame_gt_signals(out, causal=True)
|
||
|
||
out = out.loc[:, ~out.columns.duplicated()]
|
||
out = out.dropna(subset=["close"])
|
||
print(f"[04b] 스캔 프레임: {len(out):,}봉 × {len(out.columns)}열")
|
||
return out
|
||
|
||
|
||
def _eval_one_condition(
|
||
frame: pd.DataFrame,
|
||
cond: dict[str, Any],
|
||
) -> pd.Series:
|
||
"""
|
||
단일 조건 boolean Series.
|
||
|
||
Args:
|
||
frame: 평가 대상.
|
||
cond: {col, op, ...}.
|
||
|
||
Returns:
|
||
boolean Series.
|
||
"""
|
||
col = cond.get("col")
|
||
if not col or col not in frame.columns:
|
||
return pd.Series(False, index=frame.index)
|
||
s = frame[col]
|
||
op = cond.get("op", "between")
|
||
if op == "between":
|
||
lo, hi = float(cond["lo"]), float(cond["hi"])
|
||
ok = pd.to_numeric(s, errors="coerce")
|
||
part = (ok >= lo) & (ok <= hi)
|
||
elif op == "gte":
|
||
part = pd.to_numeric(s, errors="coerce") >= float(cond["value"])
|
||
elif op == "lte":
|
||
part = pd.to_numeric(s, errors="coerce") <= float(cond["value"])
|
||
elif op == "eq":
|
||
val = cond["value"]
|
||
if isinstance(val, (int, float)) and pd.api.types.is_numeric_dtype(s):
|
||
part = pd.to_numeric(s, errors="coerce") == float(val)
|
||
else:
|
||
part = s.astype(str) == str(val)
|
||
elif op == "eq_int":
|
||
part = (
|
||
pd.to_numeric(s, errors="coerce").fillna(-999).astype(int)
|
||
== int(cond["value"])
|
||
)
|
||
else:
|
||
part = pd.Series(False, index=frame.index)
|
||
return part.fillna(False)
|
||
|
||
|
||
def eval_conditions(frame: pd.DataFrame, conditions: list[dict[str, Any]]) -> pd.Series:
|
||
"""
|
||
단일 규칙의 조건을 모두 AND로 평가합니다.
|
||
|
||
Args:
|
||
frame: 스캔용 DataFrame.
|
||
conditions: {col, op, ...} 리스트.
|
||
|
||
Returns:
|
||
boolean Series (인덱스=frame.index).
|
||
"""
|
||
mask = pd.Series(True, index=frame.index)
|
||
for cond in conditions:
|
||
mask &= _eval_one_condition(frame, cond)
|
||
return mask
|
||
|
||
|
||
def eval_rule_mask(frame: pd.DataFrame, rule: dict[str, Any]) -> pd.Series:
|
||
"""
|
||
규칙 dict 평가 (logic=and|or).
|
||
|
||
Args:
|
||
frame: 스캔/스냅샷 DataFrame.
|
||
rule: conditions, logic 키 포함.
|
||
|
||
Returns:
|
||
boolean Series.
|
||
"""
|
||
conditions = rule.get("conditions") or []
|
||
if not conditions:
|
||
return pd.Series(False, index=frame.index)
|
||
logic = str(rule.get("logic", "and")).lower()
|
||
if logic == "or":
|
||
mask = pd.Series(False, index=frame.index)
|
||
for cond in conditions:
|
||
mask |= _eval_one_condition(frame, cond)
|
||
return mask
|
||
return eval_conditions(frame, conditions)
|
||
|
||
|
||
def scan_rule_fires(
|
||
frame: pd.DataFrame,
|
||
rules: list[dict[str, Any]],
|
||
) -> pd.DataFrame:
|
||
"""
|
||
모든 규칙 후보에 대해 발화 시각을 수집합니다.
|
||
|
||
Args:
|
||
frame: build_mtf_scan_frame 결과.
|
||
rules: rule_candidates.
|
||
|
||
Returns:
|
||
fire_id, rule_id, side, dt, close 컬럼 DataFrame.
|
||
"""
|
||
rows: list[dict[str, Any]] = []
|
||
fid = 0
|
||
for rule in rules:
|
||
rid = rule["rule_id"]
|
||
side = rule["side"]
|
||
mask = eval_rule_mask(frame, rule)
|
||
hits = frame.index[mask]
|
||
close_s = frame["close"]
|
||
if isinstance(close_s, pd.DataFrame):
|
||
close_s = close_s.iloc[:, 0]
|
||
for ts in hits:
|
||
rows.append(
|
||
{
|
||
"fire_id": fid,
|
||
"rule_id": rid,
|
||
"side": side,
|
||
"dt": ts.strftime("%Y-%m-%d %H:%M:%S"),
|
||
"close": _scalar_float(close_s.loc[ts]),
|
||
}
|
||
)
|
||
fid += 1
|
||
print(f" 규칙 {rid}: 발화 {len(hits):,}건")
|
||
if not rows:
|
||
return pd.DataFrame(columns=["fire_id", "rule_id", "side", "dt", "close"])
|
||
return pd.DataFrame(rows)
|