""" 04-3: 규칙 발화별 성과 라벨링 (GT leg 청산 + forward 폴백). """ from __future__ import annotations from typing import Any import numpy as np import pandas as pd from config import ( CHART_LOOKBACK_DAYS, MATCH_FORWARD_BARS, MATCH_LABEL_MODE, MATCH_MAX_HOLD_DAYS, MATCH_PRIMARY_INTERVAL, SYMBOL, TRADING_FEE_RATE, ) from deepcoin.data.mtf_bb import load_frames_from_db from deepcoin.matching.gt_schedule import load_gt_trade_events from deepcoin.ops.monitor import Monitor _NS_PER_DAY = 86_400 * 1_000_000_000 def _forward_ret_vectorized( fire_ts_ns: np.ndarray, c0: np.ndarray, close_ts_ns: np.ndarray, close_px: np.ndarray, side: np.ndarray, n_bars: int, fee_pct: float, ) -> tuple[np.ndarray, np.ndarray]: """ 고정 N봉 forward 수익률(벡터화, 루프 최소). Args: fire_ts_ns: 발화 시각(ns). c0: 발화가. close_ts_ns, close_px: 주간격 종가 시계열. side: buy | sell. n_bars: forward 봉 수. fee_pct: 왕복 수수료 %p. Returns: (ret_pct, valid_mask). """ ret = np.full(len(fire_ts_ns), np.nan, dtype=float) valid = np.zeros(len(fire_ts_ns), dtype=bool) for i in range(len(fire_ts_ns)): idx = np.searchsorted(close_ts_ns, fire_ts_ns[i], side="right") - 1 if idx < 0: continue end = idx + n_bars if end >= len(close_px): continue c_entry = c0[i] c_exit = float(close_px[end]) if side[i] == "buy": ret[i] = (c_exit / c_entry - 1.0) * 100.0 - fee_pct else: ret[i] = (c_entry / c_exit - 1.0) * 100.0 - fee_pct valid[i] = True return ret, valid def _leg_gt_ret_vectorized( fire_ts_ns: np.ndarray, c0: np.ndarray, side: np.ndarray, buy_ts: np.ndarray, buy_px: np.ndarray, sell_ts: np.ndarray, sell_px: np.ndarray, max_hold_days: int, fee_pct: float, ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """ GT 이벤트 기준 leg 청산 수익률. 매수 발화: 다음 GT 매도까지 보유. 매도 발화: 직전 GT 매수가 대비 청산 수익. Args: fire_ts_ns, c0, side: 발화 배열. buy_ts, buy_px, sell_ts, sell_px: GT 이벤트. max_hold_days: 최대 보유 일수. fee_pct: 왕복 수수료 %p. Returns: (ret_pct, valid_mask, hold_days). """ n = len(fire_ts_ns) ret = np.full(n, np.nan, dtype=float) valid = np.zeros(n, dtype=bool) hold_days = np.full(n, np.nan, dtype=float) max_hold_ns = max_hold_days * _NS_PER_DAY buy_m = side == "buy" if buy_m.any() and len(sell_ts) > 0: t_b = fire_ts_ns[buy_m] idx = np.searchsorted(sell_ts, t_b, side="right") ok = idx < len(sell_ts) if ok.any(): i_ok = np.where(buy_m)[0][ok] exit_ns = sell_ts[idx[ok]] delta = exit_ns - t_b[ok] within = (delta > 0) & (delta <= max_hold_ns) if within.any(): ii = i_ok[within] exit_px = sell_px[idx[ok][within]] entry = c0[ii] ret[ii] = (exit_px / entry - 1.0) * 100.0 - fee_pct valid[ii] = True hold_days[ii] = delta[within] / _NS_PER_DAY sell_m = side == "sell" if sell_m.any() and len(buy_ts) > 0: t_s = fire_ts_ns[sell_m] idx = np.searchsorted(buy_ts, t_s, side="left") - 1 ok = idx >= 0 if ok.any(): i_ok = np.where(sell_m)[0][ok] entry_ns = buy_ts[idx[ok]] delta = t_s[ok] - entry_ns within = (delta > 0) & (delta <= max_hold_ns) if within.any(): ii = i_ok[within] entry_px = buy_px[idx[ok][within]] exit_p = c0[ii] ret[ii] = (exit_p / entry_px - 1.0) * 100.0 - fee_pct valid[ii] = True hold_days[ii] = delta[within] / _NS_PER_DAY return ret, valid, hold_days def label_fire_outcomes( fires: pd.DataFrame, frames: dict[int, pd.DataFrame] | None = None, ) -> pd.DataFrame: """ 각 발화의 성과를 라벨링합니다. MATCH_LABEL_MODE=leg_gt: GT 다음 매도/직전 매수 기준. 미충족 분은 forward N봉으로 폴백. Args: fires: rule_fires. frames: OHLCV (폴백용). Returns: fire_outcomes (+ forward_ret_pct, label_method, hold_days, ...). """ if fires.empty: return fires.copy() fee_pct = TRADING_FEE_RATE * 2 * 100 fts = pd.to_datetime(fires["dt"]) fire_ts_ns = fts.values.astype("datetime64[ns]").astype(np.int64) c0 = fires["close"].astype(float).values side = fires["side"].astype(str).values buy_ts, buy_px, sell_ts, sell_px = load_gt_trade_events() label_method = np.full(len(fires), "", dtype=object) hold_days = np.full(len(fires), np.nan, dtype=float) ret = np.full(len(fires), np.nan, dtype=float) if MATCH_LABEL_MODE == "leg_gt": ret, valid, hd = _leg_gt_ret_vectorized( fire_ts_ns, c0, side, buy_ts, buy_px, sell_ts, sell_px, MATCH_MAX_HOLD_DAYS, fee_pct, ) label_method[valid] = "leg_gt" hold_days = hd need_fb = ~np.isfinite(ret) if need_fb.any(): if frames is None: mon = Monitor(cooldown_file=None) frames = load_frames_from_db(mon, SYMBOL, lookback_days=CHART_LOOKBACK_DAYS) if need_fb.any() and frames is not None: raw = frames.get(MATCH_PRIMARY_INTERVAL) if raw is not None and not raw.empty: px = raw.copy() if not isinstance(px.index, pd.DatetimeIndex): px.index = pd.to_datetime(px.index) px = px.sort_index() col = "close" if "close" in px.columns else "Close" close_px = px[col].astype(float).values close_ts_ns = px.index.astype(np.int64).values fb_ret, fb_ok = _forward_ret_vectorized( fire_ts_ns[need_fb], c0[need_fb], close_ts_ns, close_px, side[need_fb], MATCH_FORWARD_BARS, fee_pct, ) ret[need_fb] = np.where(fb_ok, fb_ret, np.nan) fb_idx = np.where(need_fb)[0][fb_ok] label_method[fb_idx] = f"forward_{MATCH_FORWARD_BARS}" out = fires.copy() out["forward_ret_pct"] = np.round(ret, 4) out["win"] = (ret > 0).astype(int) out["label_method"] = label_method out["hold_days"] = np.round(hold_days, 2) out["forward_bars"] = MATCH_FORWARD_BARS out = out[np.isfinite(out["forward_ret_pct"])].reset_index(drop=True) leg_n = int((out["label_method"] == "leg_gt").sum()) fb_n = int(out["label_method"].astype(str).str.startswith("forward").sum()) print( f"[04-3] 성과 라벨: {len(out):,}건 " f"(mode={MATCH_LABEL_MODE}, leg_gt={leg_n:,}, forward폴백={fb_n:,}, " f"수수료 {fee_pct:.3f}%p)" ) return out