Files
Bithumb/deepcoin/matching/label_outcomes.py
dsyoon 2cb67c42b3 GT MTF 프로필·캘리브레이션과 04 매칭/시뮬/실거래 파이프라인을 추가한다.
3분~일봉 GT 타점 분석(03c), leg 체결 순서 수정, 총자산 90% 검증 루프,
walk-forward Go/No-Go 시뮬, monitor·live_trader 및 reference 문서를 포함한다.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-31 11:27:50 +09:00

232 lines
7.1 KiB
Python

"""
04-3: 규칙 발화별 성과 라벨링 (GT leg 청산 + forward 폴백).
"""
from __future__ import annotations
from typing import Any
import numpy as np
import pandas as pd
from config import (
CHART_LOOKBACK_DAYS,
MATCH_FORWARD_BARS,
MATCH_LABEL_MODE,
MATCH_MAX_HOLD_DAYS,
MATCH_PRIMARY_INTERVAL,
SYMBOL,
TRADING_FEE_RATE,
)
from deepcoin.data.mtf_bb import load_frames_from_db
from deepcoin.matching.gt_schedule import load_gt_trade_events
from deepcoin.ops.monitor import Monitor
_NS_PER_DAY = 86_400 * 1_000_000_000
def _forward_ret_vectorized(
fire_ts_ns: np.ndarray,
c0: np.ndarray,
close_ts_ns: np.ndarray,
close_px: np.ndarray,
side: np.ndarray,
n_bars: int,
fee_pct: float,
) -> tuple[np.ndarray, np.ndarray]:
"""
고정 N봉 forward 수익률(벡터화, 루프 최소).
Args:
fire_ts_ns: 발화 시각(ns).
c0: 발화가.
close_ts_ns, close_px: 주간격 종가 시계열.
side: buy | sell.
n_bars: forward 봉 수.
fee_pct: 왕복 수수료 %p.
Returns:
(ret_pct, valid_mask).
"""
ret = np.full(len(fire_ts_ns), np.nan, dtype=float)
valid = np.zeros(len(fire_ts_ns), dtype=bool)
for i in range(len(fire_ts_ns)):
idx = np.searchsorted(close_ts_ns, fire_ts_ns[i], side="right") - 1
if idx < 0:
continue
end = idx + n_bars
if end >= len(close_px):
continue
c_entry = c0[i]
c_exit = float(close_px[end])
if side[i] == "buy":
ret[i] = (c_exit / c_entry - 1.0) * 100.0 - fee_pct
else:
ret[i] = (c_entry / c_exit - 1.0) * 100.0 - fee_pct
valid[i] = True
return ret, valid
def _leg_gt_ret_vectorized(
fire_ts_ns: np.ndarray,
c0: np.ndarray,
side: np.ndarray,
buy_ts: np.ndarray,
buy_px: np.ndarray,
sell_ts: np.ndarray,
sell_px: np.ndarray,
max_hold_days: int,
fee_pct: float,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
GT 이벤트 기준 leg 청산 수익률.
매수 발화: 다음 GT 매도까지 보유.
매도 발화: 직전 GT 매수가 대비 청산 수익.
Args:
fire_ts_ns, c0, side: 발화 배열.
buy_ts, buy_px, sell_ts, sell_px: GT 이벤트.
max_hold_days: 최대 보유 일수.
fee_pct: 왕복 수수료 %p.
Returns:
(ret_pct, valid_mask, hold_days).
"""
n = len(fire_ts_ns)
ret = np.full(n, np.nan, dtype=float)
valid = np.zeros(n, dtype=bool)
hold_days = np.full(n, np.nan, dtype=float)
max_hold_ns = max_hold_days * _NS_PER_DAY
buy_m = side == "buy"
if buy_m.any() and len(sell_ts) > 0:
t_b = fire_ts_ns[buy_m]
idx = np.searchsorted(sell_ts, t_b, side="right")
ok = idx < len(sell_ts)
if ok.any():
i_ok = np.where(buy_m)[0][ok]
exit_ns = sell_ts[idx[ok]]
delta = exit_ns - t_b[ok]
within = (delta > 0) & (delta <= max_hold_ns)
if within.any():
ii = i_ok[within]
exit_px = sell_px[idx[ok][within]]
entry = c0[ii]
ret[ii] = (exit_px / entry - 1.0) * 100.0 - fee_pct
valid[ii] = True
hold_days[ii] = delta[within] / _NS_PER_DAY
sell_m = side == "sell"
if sell_m.any() and len(buy_ts) > 0:
t_s = fire_ts_ns[sell_m]
idx = np.searchsorted(buy_ts, t_s, side="left") - 1
ok = idx >= 0
if ok.any():
i_ok = np.where(sell_m)[0][ok]
entry_ns = buy_ts[idx[ok]]
delta = t_s[ok] - entry_ns
within = (delta > 0) & (delta <= max_hold_ns)
if within.any():
ii = i_ok[within]
entry_px = buy_px[idx[ok][within]]
exit_p = c0[ii]
ret[ii] = (exit_p / entry_px - 1.0) * 100.0 - fee_pct
valid[ii] = True
hold_days[ii] = delta[within] / _NS_PER_DAY
return ret, valid, hold_days
def label_fire_outcomes(
fires: pd.DataFrame,
frames: dict[int, pd.DataFrame] | None = None,
) -> pd.DataFrame:
"""
각 발화의 성과를 라벨링합니다.
MATCH_LABEL_MODE=leg_gt: GT 다음 매도/직전 매수 기준.
미충족 분은 forward N봉으로 폴백.
Args:
fires: rule_fires.
frames: OHLCV (폴백용).
Returns:
fire_outcomes (+ forward_ret_pct, label_method, hold_days, ...).
"""
if fires.empty:
return fires.copy()
fee_pct = TRADING_FEE_RATE * 2 * 100
fts = pd.to_datetime(fires["dt"])
fire_ts_ns = fts.values.astype("datetime64[ns]").astype(np.int64)
c0 = fires["close"].astype(float).values
side = fires["side"].astype(str).values
buy_ts, buy_px, sell_ts, sell_px = load_gt_trade_events()
label_method = np.full(len(fires), "", dtype=object)
hold_days = np.full(len(fires), np.nan, dtype=float)
ret = np.full(len(fires), np.nan, dtype=float)
if MATCH_LABEL_MODE == "leg_gt":
ret, valid, hd = _leg_gt_ret_vectorized(
fire_ts_ns,
c0,
side,
buy_ts,
buy_px,
sell_ts,
sell_px,
MATCH_MAX_HOLD_DAYS,
fee_pct,
)
label_method[valid] = "leg_gt"
hold_days = hd
need_fb = ~np.isfinite(ret)
if need_fb.any():
if frames is None:
mon = Monitor(cooldown_file=None)
frames = load_frames_from_db(mon, SYMBOL, lookback_days=CHART_LOOKBACK_DAYS)
if need_fb.any() and frames is not None:
raw = frames.get(MATCH_PRIMARY_INTERVAL)
if raw is not None and not raw.empty:
px = raw.copy()
if not isinstance(px.index, pd.DatetimeIndex):
px.index = pd.to_datetime(px.index)
px = px.sort_index()
col = "close" if "close" in px.columns else "Close"
close_px = px[col].astype(float).values
close_ts_ns = px.index.astype(np.int64).values
fb_ret, fb_ok = _forward_ret_vectorized(
fire_ts_ns[need_fb],
c0[need_fb],
close_ts_ns,
close_px,
side[need_fb],
MATCH_FORWARD_BARS,
fee_pct,
)
ret[need_fb] = np.where(fb_ok, fb_ret, np.nan)
fb_idx = np.where(need_fb)[0][fb_ok]
label_method[fb_idx] = f"forward_{MATCH_FORWARD_BARS}"
out = fires.copy()
out["forward_ret_pct"] = np.round(ret, 4)
out["win"] = (ret > 0).astype(int)
out["label_method"] = label_method
out["hold_days"] = np.round(hold_days, 2)
out["forward_bars"] = MATCH_FORWARD_BARS
out = out[np.isfinite(out["forward_ret_pct"])].reset_index(drop=True)
leg_n = int((out["label_method"] == "leg_gt").sum())
fb_n = int(out["label_method"].astype(str).str.startswith("forward").sum())
print(
f"[04-3] 성과 라벨: {len(out):,}"
f"(mode={MATCH_LABEL_MODE}, leg_gt={leg_n:,}, forward폴백={fb_n:,}, "
f"수수료 {fee_pct:.3f}%p)"
)
return out