Bithumb/deepcoin/matching/rule_eval.py

"""
규칙 조건 벡터 평가·MTF 스캔 프레임 병합.
"""

from __future__ import annotations

from typing import Any

import numpy as np
import pandas as pd

from config import GENERAL_ANALYSIS_INTERVALS, MATCH_PRIMARY_INTERVAL
from deepcoin.analysis.general_analysis_core import interval_tf_prefix
from deepcoin.analysis.general_analysis_pipeline import general_analysis_enrich_bars
from config import (
    ALIGN_RSI_CONFLICT_TIMING_HIGH,
    ALIGN_RSI_CONFLICT_TIMING_LOW,
    ALIGN_RSI_CONFLICT_TREND_HIGH,
    ALIGN_RSI_CONFLICT_TREND_LOW,
    ALIGN_RSI_OVERBOUGHT,
    ALIGN_RSI_OVERSOLD,
    TIMING_INTERVALS,
    TREND_INTERVALS,
)
from deepcoin.analysis.general_analysis_core import ga_col


def _add_align_columns_vectorized(frame: pd.DataFrame) -> pd.DataFrame:
    """
    스캔 프레임에 ga_align_* 컬럼을 벡터 연산으로 추가합니다.

    Args:
        frame: TF 접두사 컬럼이 포함된 DataFrame.

    Returns:
        align 컬럼이 추가된 DataFrame.
    """
    out = frame.copy()
    rsi_oversold = pd.Series(0, index=out.index, dtype=float)
    rsi_overbought = pd.Series(0, index=out.index, dtype=float)
    n_timing = 0
    for iv in TIMING_INTERVALS:
        p = interval_tf_prefix(iv)
        rk = f"{p}_RSI"
        if rk not in out.columns:
            continue
        n_timing += 1
        rsi = pd.to_numeric(out[rk], errors="coerce")
        rsi_oversold += (rsi < ALIGN_RSI_OVERSOLD).astype(int)
        rsi_overbought += (rsi > ALIGN_RSI_OVERBOUGHT).astype(int)

    trend_up = pd.Series(0, index=out.index, dtype=float)
    trend_down = pd.Series(0, index=out.index, dtype=float)
    n_trend = 0
    for iv in TREND_INTERVALS:
        p = interval_tf_prefix(iv)
        sk = f"{p}_{ga_col('struct_trend')}"
        if sk not in out.columns:
            continue
        n_trend += 1
        st = out[sk].astype(str)
        trend_up += (st == "up").astype(int)
        trend_down += (st == "down").astype(int)

    denom_t = max(n_timing, 1)
    denom_r = max(n_trend, 1)
    out["ga_align_rsi_oversold_tf"] = rsi_oversold
    out["ga_align_rsi_overbought_tf"] = rsi_overbought
    out["ga_align_trend_up_tf"] = trend_up
    out["ga_align_trend_down_tf"] = trend_down
    out["ga_align_timing_buy_score"] = (rsi_oversold / denom_t).round(3)
    out["ga_align_timing_sell_score"] = (rsi_overbought / denom_t).round(3)
    out["ga_align_trend_score"] = ((trend_up - trend_down) / denom_r).round(3)

    conflict = pd.Series(0, index=out.index, dtype=int)
    m3_rsi = out.get("m3_RSI")
    d1_rsi = out.get("d1_RSI")
    if m3_rsi is not None and d1_rsi is not None:
        m3v = pd.to_numeric(m3_rsi, errors="coerce")
        d1v = pd.to_numeric(d1_rsi, errors="coerce")
        conflict = (
            ((m3v < ALIGN_RSI_CONFLICT_TIMING_LOW) & (d1v > ALIGN_RSI_CONFLICT_TREND_HIGH))
            | ((m3v > ALIGN_RSI_CONFLICT_TIMING_HIGH) & (d1v < ALIGN_RSI_CONFLICT_TREND_LOW))
        ).astype(int)
    out["ga_align_mtf_conflict"] = conflict
    return out


def _scalar_float(val: Any) -> float:
    """Series/ndarray 스칼라를 float로 변환."""
    if isinstance(val, pd.Series):
        val = val.iloc[0]
    return float(val)


def conditions_columns(rules: list[dict[str, Any]]) -> set[str]:
    """
    규칙 목록에서 참조하는 컬럼명 집합을 반환합니다.

    Args:
        rules: rule_candidates 항목 리스트.

    Returns:
        컬럼명 set.
    """
    cols: set[str] = set()
    for rule in rules:
        for cond in rule.get("conditions", []):
            c = cond.get("col")
            if c:
                cols.add(c)
    return cols


def build_mtf_scan_frame(
    frames: dict[int, pd.DataFrame],
    needed_cols: set[str],
) -> pd.DataFrame:
    """
    주간격(m3) 인덱스에 필요 컬럼만 merge_asof로 붙인 스캔용 DataFrame을 만듭니다.

    Args:
        frames: interval → OHLCV.
        needed_cols: 규칙 평가에 필요한 컬럼명.

    Returns:
        m3 인덱스 wide DataFrame (close 포함).
    """
    primary = MATCH_PRIMARY_INTERVAL
    raw = frames.get(primary)
    if raw is None or raw.empty:
        raise RuntimeError(f"주간격 {primary}분 데이터 없음")

    print(f"[04b] Phase A: 8TF enrich (스캔용)...")
    enriched: dict[int, pd.DataFrame] = {}
    for iv in GENERAL_ANALYSIS_INTERVALS:
        r = frames.get(iv)
        if r is None or r.empty:
            continue
        label = interval_tf_prefix(iv)
        print(f"  enrich {label} ({len(r):,}봉)...")
        enriched[iv] = general_analysis_enrich_bars(r, iv, full_context=True)

    base = enriched[primary].copy()
    if not isinstance(base.index, pd.DatetimeIndex):
        base.index = pd.to_datetime(base.index)
    base = base.sort_index()
    out = pd.DataFrame(index=base.index)
    close_col = "close" if "close" in base.columns else "Close"
    out["close"] = base[close_col].astype(float)

    def _source_col(prefixed: str, prefix: str, ef: pd.DataFrame) -> str | None:
        """m3_RSI → RSI, m60_ga_struct_trend → ga_struct_trend."""
        if not prefixed.startswith(f"{prefix}_"):
            return None
        suffix = prefixed[len(prefix) + 1 :]
        if suffix in ef.columns:
            return suffix
        return None

    for iv in GENERAL_ANALYSIS_INTERVALS:
        ef = enriched.get(iv)
        if ef is None:
            continue
        p = interval_tf_prefix(iv)
        for col in needed_cols:
            if col in out.columns or not col.startswith(f"{p}_"):
                continue
            src = _source_col(col, p, ef)
            if src is None:
                continue
            if iv == primary:
                out[col] = ef[src].reindex(out.index)
            else:
                sub = ef[[src]].copy()
                if not isinstance(sub.index, pd.DatetimeIndex):
                    sub.index = pd.to_datetime(sub.index)
                sub = sub.sort_index().rename(columns={src: col})
                merged = pd.merge_asof(
                    out.reset_index(names="_ts"),
                    sub.reset_index(names="_ts"),
                    on="_ts",
                    direction="backward",
                ).set_index("_ts")
                out[col] = merged[col].values

    align_needed = [c for c in needed_cols if c.startswith("ga_align_")]
    if align_needed:
        out = _add_align_columns_vectorized(out)

    gt_needed = [c for c in needed_cols if c.startswith("gt_")]
    bb_in_rules = "bb_pos" in needed_cols
    if gt_needed or bb_in_rules:
        ef = enriched[primary]
        for src in ("Low", "High", "low", "high", "bb_pos", "Open", "Volume"):
            if src in ef.columns and src not in out.columns:
                out[src] = ef[src].reindex(out.index)
        if "Low" not in out.columns and "low" in out.columns:
            out["Low"] = out["low"]
        if "High" not in out.columns and "high" in out.columns:
            out["High"] = out["high"]
        from deepcoin.ground_truth.gt_signal_rules import enrich_scan_frame_gt_signals

        # 시뮬·live 스캔: 타점 판단은 항상 인과적 (GT 정답 생성은 ground_truth.py 별도)
        out = enrich_scan_frame_gt_signals(out, causal=True)

    out = out.loc[:, ~out.columns.duplicated()]
    out = out.dropna(subset=["close"])
    print(f"[04b] 스캔 프레임: {len(out):,}봉 × {len(out.columns)}열")
    return out


def _eval_one_condition(
    frame: pd.DataFrame,
    cond: dict[str, Any],
) -> pd.Series:
    """
    단일 조건 boolean Series.

    Args:
        frame: 평가 대상.
        cond: {col, op, ...}.

    Returns:
        boolean Series.
    """
    col = cond.get("col")
    if not col or col not in frame.columns:
        return pd.Series(False, index=frame.index)
    s = frame[col]
    op = cond.get("op", "between")
    if op == "between":
        lo, hi = float(cond["lo"]), float(cond["hi"])
        ok = pd.to_numeric(s, errors="coerce")
        part = (ok >= lo) & (ok <= hi)
    elif op == "gte":
        part = pd.to_numeric(s, errors="coerce") >= float(cond["value"])
    elif op == "lte":
        part = pd.to_numeric(s, errors="coerce") <= float(cond["value"])
    elif op == "eq":
        val = cond["value"]
        if isinstance(val, (int, float)) and pd.api.types.is_numeric_dtype(s):
            part = pd.to_numeric(s, errors="coerce") == float(val)
        else:
            part = s.astype(str) == str(val)
    elif op == "eq_int":
        part = (
            pd.to_numeric(s, errors="coerce").fillna(-999).astype(int)
            == int(cond["value"])
        )
    else:
        part = pd.Series(False, index=frame.index)
    return part.fillna(False)


def eval_conditions(frame: pd.DataFrame, conditions: list[dict[str, Any]]) -> pd.Series:
    """
    단일 규칙의 조건을 모두 AND로 평가합니다.

    Args:
        frame: 스캔용 DataFrame.
        conditions: {col, op, ...} 리스트.

    Returns:
        boolean Series (인덱스=frame.index).
    """
    mask = pd.Series(True, index=frame.index)
    for cond in conditions:
        mask &= _eval_one_condition(frame, cond)
    return mask


def eval_rule_mask(frame: pd.DataFrame, rule: dict[str, Any]) -> pd.Series:
    """
    규칙 dict 평가 (logic=and|or).

    Args:
        frame: 스캔/스냅샷 DataFrame.
        rule: conditions, logic 키 포함.

    Returns:
        boolean Series.
    """
    conditions = rule.get("conditions") or []
    if not conditions:
        return pd.Series(False, index=frame.index)
    logic = str(rule.get("logic", "and")).lower()
    if logic == "or":
        mask = pd.Series(False, index=frame.index)
        for cond in conditions:
            mask |= _eval_one_condition(frame, cond)
        return mask
    return eval_conditions(frame, conditions)


def scan_rule_fires(
    frame: pd.DataFrame,
    rules: list[dict[str, Any]],
) -> pd.DataFrame:
    """
    모든 규칙 후보에 대해 발화 시각을 수집합니다.

    Args:
        frame: build_mtf_scan_frame 결과.
        rules: rule_candidates.

    Returns:
        fire_id, rule_id, side, dt, close 컬럼 DataFrame.
    """
    rows: list[dict[str, Any]] = []
    fid = 0
    for rule in rules:
        rid = rule["rule_id"]
        side = rule["side"]
        mask = eval_rule_mask(frame, rule)
        hits = frame.index[mask]
        close_s = frame["close"]
        if isinstance(close_s, pd.DataFrame):
            close_s = close_s.iloc[:, 0]
        for ts in hits:
            rows.append(
                {
                    "fire_id": fid,
                    "rule_id": rid,
                    "side": side,
                    "dt": ts.strftime("%Y-%m-%d %H:%M:%S"),
                    "close": _scalar_float(close_s.loc[ts]),
                }
            )
            fid += 1
        print(f"  규칙 {rid}: 발화 {len(hits):,}건")
    if not rows:
        return pd.DataFrame(columns=["fire_id", "rule_id", "side", "dt", "close"])
    return pd.DataFrame(rows)