Bithumb/deepcoin/ground_truth/ground_truth.py

"""
차트 조회 구간(기본 1년) 3분봉에서 최적 매수·매도 타점(정답 라벨)을 생성합니다.

방법:
  1) ZigZag 피벗(스윙 고저) 추출
  2) split_buy_peak_sell: 저점 분할 매수 + 고점 1~2회 매도 (비중=삼각형 크기)
  3) ground_truth_trades.json 저장

실행:
  python scripts/02_ground_truth.py
  python scripts/05_chart_truth.py
"""

from __future__ import annotations

import json
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any

import numpy as np
import pandas as pd

from config import (
    CHART_LOOKBACK_DAYS,
    ENTRY_INTERVAL,
    GROUND_TRUTH_FILE,
    GT_BUY_BB_MAX,
    GT_BUY_MIN_BARS,
    GT_BUY_MIN_SWING_PCT,
    GT_INITIAL_CASH_KRW,
    GT_MAX_BUYS_PER_LEG,
    GT_MAX_ROUND_TRIPS,
    TRADING_FEE_RATE,
    GT_MAX_SELLS_PER_LEG,
    GT_MIN_BARS_BETWEEN,
    GT_MIN_LEG_PCT,
    GT_MIN_SWING_PCT,
    GT_PIVOT_ORDER,
    GT_SELECTION_MODE,
    GT_SELL_SPLIT_GAP_PCT,
    SYMBOL,
)
from deepcoin.common.indicators import apply_bar_indicators, get_trend
from deepcoin.data.mtf_bb import load_frames_from_db

from deepcoin.paths import resolve_ground_truth_file

DEFAULT_OUTPUT = resolve_ground_truth_file()


@dataclass
class Pivot:
    """스윙 피벗 한 점."""

    idx: int
    ts: pd.Timestamp
    kind: str  # "trough" | "peak"
    price: float


@dataclass
class TradePoint:
    """정답 타점 1건."""

    dt: str
    action: str
    price: float
    memo: str
    weight: float = 1.0
    leg_id: int = 0
    bb_pos: float | None = None
    rsi: float | None = None
    pivot_kind: str = ""
    forward_return_pct: float | None = None


def _local_extrema_indices(arr: np.ndarray, order: int, kind: str) -> np.ndarray:
    """
    order 양옆 구간에서 국소 최소/최대 인덱스를 반환합니다.

    Args:
        arr: 가격 배열.
        order: 좌우 봉 수.
        kind: "min" 또는 "max".

    Returns:
        인덱스 ndarray.
    """
    n = len(arr)
    if n < 2 * order + 1:
        return np.array([], dtype=int)
    out: list[int] = []
    for i in range(order, n - order):
        window = arr[i - order : i + order + 1]
        if kind == "min" and arr[i] <= window.min():
            out.append(i)
        elif kind == "max" and arr[i] >= window.max():
            out.append(i)
    return np.array(out, dtype=int)


def build_zigzag_pivots(
    df: pd.DataFrame,
    min_swing_pct: float = GT_MIN_SWING_PCT,
    pivot_order: int = GT_PIVOT_ORDER,
) -> list[Pivot]:
    """
    ZigZag 방식으로 스윙 저점·고점 피벗을 만듭니다.

    Args:
        df: OHLCV (index=datetime).
        min_swing_pct: 피벗 확정 최소 가격 변동(%).
        pivot_order: 국소 극값 탐색 반경(봉).

    Returns:
        시간순 Pivot 리스트.
    """
    low = df["Low"].astype(float).values
    high = df["High"].astype(float).values
    index = df.index
    min_ratio = min_swing_pct / 100.0

    trough_idx = _local_extrema_indices(low, pivot_order, "min")
    peak_idx = _local_extrema_indices(high, pivot_order, "max")

    candidates: list[tuple[int, str, float]] = []
    for i in trough_idx:
        candidates.append((int(i), "trough", float(low[i])))
    for i in peak_idx:
        candidates.append((int(i), "peak", float(high[i])))
    candidates.sort(key=lambda x: x[0])
    if not candidates:
        return []

    pivots: list[Pivot] = []
    last_kind: str | None = None
    last_price = 0.0

    for idx, kind, price in candidates:
        if not pivots:
            pivots.append(Pivot(idx, index[idx], kind, price))
            last_kind = kind
            last_price = price
            continue

        if kind == last_kind:
            # 같은 방향이면 더 극단적인 쪽만 유지
            if kind == "trough" and price < last_price:
                pivots[-1] = Pivot(idx, index[idx], kind, price)
                last_price = price
            elif kind == "peak" and price > last_price:
                pivots[-1] = Pivot(idx, index[idx], kind, price)
                last_price = price
            continue

        move = abs(price - last_price) / max(last_price, 1e-9)
        if move >= min_ratio:
            pivots.append(Pivot(idx, index[idx], kind, price))
            last_kind = kind
            last_price = price

    return pivots


def _select_optimal_chain(
    pivots: list[Pivot],
    min_bars: int = GT_MIN_BARS_BETWEEN,
    max_round_trips: int = GT_MAX_ROUND_TRIPS,
    mode: str = GT_SELECTION_MODE,
) -> list[Pivot]:
    """
    피벗에서 정답 체인을 선택합니다.

    Args:
        pivots: ZigZag 피벗.
        min_bars: 연속 체결 최소 봉 간격.
        max_round_trips: 최대 라운드트립 수.
        mode: "zigzag" 또는 "max_profit".

    Returns:
        선택된 Pivot 부분열 (매수·매도 교대).
    """
    if len(pivots) < 2:
        return []

    if mode == "zigzag":
        return _filter_alternating_pivots(pivots, min_bars, max_round_trips * 2)

    if mode == "major_swings":
        return _select_major_swings(
            pivots,
            min_bars=min_bars,
            max_round_trips=max_round_trips,
            min_leg_pct=GT_MIN_LEG_PCT,
        )

    intervals: list[tuple[int, int, float, Pivot, Pivot]] = []
    for i, buy_p in enumerate(pivots):
        if buy_p.kind != "trough":
            continue
        for j in range(i + 1, len(pivots)):
            sell_p = pivots[j]
            if sell_p.kind != "peak":
                continue
            if sell_p.idx - buy_p.idx < min_bars:
                continue
            if sell_p.price <= buy_p.price:
                continue
            profit = (sell_p.price - buy_p.price) / buy_p.price * 100.0
            intervals.append((buy_p.idx, sell_p.idx, profit, buy_p, sell_p))

    if not intervals:
        return _filter_alternating_pivots(pivots, min_bars, max_round_trips * 2)

    intervals.sort(key=lambda x: x[1])
    m = len(intervals)
    sell_bars = [iv[1] for iv in intervals]

    def prev_non_overlap(k: int) -> int:
        """매도 봉이 겹치지 않도록, 이전 구간의 매도 봉 < 현재 매수 봉."""
        buy_bar = intervals[k][0]
        lo, hi = 0, k - 1
        ans = -1
        while lo <= hi:
            mid = (lo + hi) // 2
            if sell_bars[mid] < buy_bar:
                ans = mid
                lo = mid + 1
            else:
                hi = mid - 1
        return ans

    pprev = [prev_non_overlap(k) for k in range(m)]

    dp_val = [0.0] * m
    dp_take = [False] * m
    for k in range(m):
        profit = intervals[k][2]
        p_idx = pprev[k]
        skip = profit
        take = profit + (dp_val[p_idx] if p_idx >= 0 else 0.0)
        if take >= skip:
            dp_val[k] = take
            dp_take[k] = True
        else:
            dp_val[k] = skip

    chain_iv: list[tuple[int, int, float, Pivot, Pivot]] = []
    k = m - 1
    if m == 0:
        return []
    best_end = max(range(m), key=lambda i: dp_val[i])
    k = best_end
    while k >= 0 and len(chain_iv) < max_round_trips:
        if dp_take[k]:
            chain_iv.append(intervals[k])
            k = pprev[k]
        else:
            k -= 1

    chain_iv.reverse()
    result: list[Pivot] = []
    for _, _, _, bp, sp in chain_iv:
        result.extend([bp, sp])
    return result


def _select_major_swings(
    pivots: list[Pivot],
    min_bars: int,
    max_round_trips: int,
    min_leg_pct: float,
) -> list[Pivot]:
    """
    ZigZag 교대 체인에서 구간 수익이 min_leg_pct 이상인 매수·매도만 남깁니다.

    구간이 max_round_trips를 초과하면 비겹침 수익 합이 최대가 되도록 고릅니다.

    Args:
        pivots: ZigZag 피벗.
        min_bars: 체결 간 최소 봉 수.
        max_round_trips: 최대 라운드트립.
        min_leg_pct: 한 구간 최소 수익률(%).

    Returns:
        선택된 Pivot 리스트 (시간순).
    """
    chain = _filter_alternating_pivots(pivots, min_bars, len(pivots))
    if len(chain) < 2:
        return chain

    legs: list[tuple[float, Pivot, Pivot, int, int]] = []
    i = 0
    while i < len(chain) - 1:
        buy_p = chain[i]
        sell_p = chain[i + 1]
        if buy_p.kind == "trough" and sell_p.kind == "peak":
            profit = (sell_p.price - buy_p.price) / max(buy_p.price, 1e-9) * 100.0
            if profit >= min_leg_pct:
                legs.append((profit, buy_p, sell_p, buy_p.idx, sell_p.idx))
            i += 2
        else:
            i += 1

    if not legs:
        # 임계값 완화 후 재시도
        return _select_major_swings(
            pivots,
            min_bars,
            max_round_trips,
            min_leg_pct=max(min_leg_pct * 0.6, 3.0),
        )

    if len(legs) <= max_round_trips:
        out: list[Pivot] = []
        for _, bp, sp, _, _ in legs:
            out.extend([bp, sp])
        return out

    # 1년 라벨: 시간순 비겹침 구간 전부 사용 (상한으로 뒤쪽 월이 빠지지 않게 함)
    if CHART_LOOKBACK_DAYS >= 300:
        chosen: list[tuple[float, Pivot, Pivot, int, int]] = []
        last_sell_bar = -1
        for pr, bp, sp, lb, sb in sorted(legs, key=lambda x: x[3]):
            if lb > last_sell_bar:
                chosen.append((pr, bp, sp, lb, sb))
                last_sell_bar = sb
        result: list[Pivot] = []
        for _pr, bp, sp, _lb, _sb in chosen:
            result.extend([bp, sp])
        return result

    intervals = [(lb, sb, pr, bp, sp) for pr, bp, sp, lb, sb in legs]
    intervals.sort(key=lambda x: x[1])
    m = len(intervals)
    sell_bars = [iv[1] for iv in intervals]

    def prev_non_overlap(k: int) -> int:
        buy_bar = intervals[k][0]
        lo, hi = 0, k - 1
        ans = -1
        while lo <= hi:
            mid = (lo + hi) // 2
            if sell_bars[mid] < buy_bar:
                ans = mid
                lo = mid + 1
            else:
                hi = mid - 1
        return ans

    pprev = [prev_non_overlap(k) for k in range(m)]
    dp_val = [0.0] * m
    dp_take = [False] * m
    for k in range(m):
        profit = intervals[k][2]
        p_idx = pprev[k]
        take = profit + (dp_val[p_idx] if p_idx >= 0 else 0.0)
        if take >= profit:
            dp_val[k] = take
            dp_take[k] = True
        else:
            dp_val[k] = profit

    best_end = max(range(m), key=lambda i: dp_val[i])
    k = best_end
    chosen: list[tuple[float, Pivot, Pivot, int, int]] = []
    while k >= 0 and len(chosen) < max_round_trips:
        if dp_take[k]:
            chosen.append(intervals[k])
            k = pprev[k]
        else:
            k -= 1
    chosen.reverse()
    result: list[Pivot] = []
    for _lb, _sb, _pr, bp, sp in chosen:
        result.extend([bp, sp])
    return result


def _filter_alternating_pivots(
    pivots: list[Pivot],
    min_bars: int,
    max_points: int,
) -> list[Pivot]:
    """ZigZag 피벗을 간격·교대 규칙으로만 줄입니다."""
    filtered: list[Pivot] = []
    for p in pivots:
        if filtered and p.idx - filtered[-1].idx < min_bars:
            continue
        if filtered and p.kind == filtered[-1].kind:
            if p.kind == "trough" and p.price < filtered[-1].price:
                filtered[-1] = p
            elif p.kind == "peak" and p.price > filtered[-1].price:
                filtered[-1] = p
            continue
        filtered.append(p)
    if filtered and filtered[0].kind == "peak":
        filtered = filtered[1:]
    if filtered and filtered[-1].kind == "trough":
        filtered = filtered[:-1]
    return filtered[:max_points]


def _bb_context(row: pd.Series) -> tuple[float | None, float | None, float | None]:
    """봉의 BB %B, RSI, 이격도(20 기본)."""
    from config import DISPARITY_PERIODS
    from deepcoin.common.indicators import disparity_column

    bb = None
    if "bb_pos" in row.index and pd.notna(row["bb_pos"]):
        bb = round(float(row["bb_pos"]), 3)
    rsi = None
    if "RSI" in row.index and pd.notna(row["RSI"]):
        rsi = round(float(row["RSI"]), 1)
    disp = None
    primary_p = 20 if 20 in DISPARITY_PERIODS else DISPARITY_PERIODS[0]
    dcol = disparity_column(primary_p)
    if dcol in row.index and pd.notna(row[dcol]):
        disp = round(float(row[dcol]), 1)
    return bb, rsi, disp


def _memo_for_trade(
    action: str,
    pivot: Pivot,
    bb_pos: float | None,
    rsi: float | None,
    disparity: float | None,
    forward_pct: float | None,
) -> str:
    """타점 해석 메모."""
    zone = "중단"
    if bb_pos is not None:
        if bb_pos < 0.25:
            zone = "밴드 하단"
        elif bb_pos > 0.75:
            zone = "밴드 상단"
    parts = [
        f"ZigZag {pivot.kind}",
        zone,
    ]
    if rsi is not None:
        parts.append(f"RSI {rsi}")
    if disparity is not None:
        parts.append(f"D.I.{disparity}")
    if forward_pct is not None and action == "buy":
        parts.append(f"다음 매도까지 +{forward_pct:.1f}%")
    elif forward_pct is not None and action == "sell":
        parts.append(f"직전 매수 대비 +{forward_pct:.1f}%")
    return " · ".join(parts)


def _bar_index(df: pd.DataFrame, ts: pd.Timestamp) -> int:
    """타임스탬프의 정수 봉 위치."""
    loc = df.index.get_loc(ts if ts in df.index else df.index[df.index.get_indexer([ts], method="nearest")[0]])
    if isinstance(loc, slice):
        return int(loc.start or 0)
    if hasattr(loc, "__len__") and not isinstance(loc, int):
        return int(loc[-1])
    return int(loc)


def _row_at_ts(df: pd.DataFrame, ts: pd.Timestamp) -> pd.Series:
    """타임스탬프에 해당하는 봉 1행."""
    loc = ts if ts in df.index else df.index[df.index.get_indexer([ts], method="nearest")[0]]
    row = df.loc[loc]
    if isinstance(row, pd.DataFrame):
        row = row.iloc[-1]
    return row


def _normalize_weights(scores: list[float]) -> list[float]:
    """비중 점수를 합 1로 정규화."""
    total = sum(scores)
    if total <= 0:
        n = len(scores)
        return [1.0 / n] * n if n else []
    return [s / total for s in scores]


def _collect_buy_troughs(
    df: pd.DataFrame,
    buy_pivots: list[Pivot],
    start: pd.Timestamp,
    end: pd.Timestamp,
    min_bars: int,
    max_buys: int = GT_MAX_BUYS_PER_LEG,
) -> list[Pivot]:
    """
    매도 전 구간의 ZigZag 저점(trough)을 모읍니다.

    BB 하단이면서 구간 최저에 가까운 저점 1건만 추가 보완합니다.
    """
    out: list[Pivot] = []
    for p in buy_pivots:
        if start < p.ts < end:
            out.append(p)

    if "bb_pos" in df.columns and out:
        seg = df[(df.index > start) & (df.index < end)]
        if not seg.empty and "bb_pos" in seg.columns:
            bb_seg = seg[seg["bb_pos"] <= GT_BUY_BB_MAX]
            if not bb_seg.empty:
                loc = bb_seg["Low"].astype(float).idxmin()
                idx = _bar_index(df, loc)
                if all(abs(idx - p.idx) >= min_bars for p in out):
                    out.append(
                        Pivot(idx, loc, "trough", float(bb_seg.loc[loc, "Low"]))
                    )

    out.sort(key=lambda x: x.ts)
    filtered: list[Pivot] = []
    for p in out:
        if filtered and p.idx - filtered[-1].idx < min_bars:
            if p.price < filtered[-1].price:
                filtered[-1] = p
            continue
        filtered.append(p)

    if len(filtered) > max_buys:
        # 가격이 낮은(저점) 순으로 max_buys만 유지 후 시간순
        filtered.sort(key=lambda x: x.price)
        filtered = sorted(filtered[:max_buys], key=lambda x: x.ts)
    return filtered


def _peak_sell_points(
    df: pd.DataFrame,
    peak: Pivot,
    max_splits: int,
    split_gap_pct: float,
) -> list[tuple[Pivot, float]]:
    """
    고점에서 1회 또는 분할 매도 시점·비중.

    Returns:
        (피벗, 비중) 리스트. 비중 합 = 1.0.
    """
    row = _row_at_ts(df, peak.ts)
    main_price = float(row["High"]) if "High" in row else peak.price
    main = Pivot(peak.idx, peak.ts, "peak", main_price)

    if max_splits < 2:
        return [(main, 1.0)]

    seg = df.iloc[peak.idx : peak.idx + 80]
    if len(seg) < 5:
        return [(main, 1.0)]

    sub_peaks: list[Pivot] = []
    highs = seg["High"].astype(float).values
    for j in range(2, len(seg) - 2):
        if highs[j] >= highs[j - 2 : j + 3].max():
            px = float(highs[j])
            if abs(px - main_price) / max(main_price, 1e-9) * 100 <= split_gap_pct:
                sub_peaks.append(
                    Pivot(peak.idx + j, seg.index[j], "peak", px)
                )

    if not sub_peaks:
        return [(main, 1.0)]

    second = max(sub_peaks, key=lambda x: x.price)
    if second.ts == main.ts:
        return [(main, 1.0)]
    return [(main, 0.65), (second, 0.35)]


def build_split_buy_peak_sell_trades(
    df: pd.DataFrame,
    raw_pivots: list[Pivot],
    sell_peaks: list[Pivot],
    buy_min_bars: int = GT_BUY_MIN_BARS,
) -> list[TradePoint]:
    """
    저점 분할 매수 + 고점 1~2회 매도 정답 타점.

    Args:
        df: 지표 포함 3분봉.
        raw_pivots: ZigZag 피벗(매수 탐지용, 낮은 스윙%).
        sell_peaks: 고점 매도 기준 피벗(major swing).
        buy_min_bars: 분할 매수 최소 간격(봉).

    Returns:
        TradePoint 리스트.
    """
    buy_pivots = build_zigzag_pivots(
        df, min_swing_pct=GT_BUY_MIN_SWING_PCT, pivot_order=GT_PIVOT_ORDER
    )
    buy_pivots = [p for p in buy_pivots if p.kind == "trough"]

    sell_peaks = sorted(sell_peaks, key=lambda x: x.ts)
    trades: list[TradePoint] = []
    prev_sell_ts = df.index[0]

    for leg_id, peak in enumerate(sell_peaks):
        troughs = _collect_buy_troughs(df, buy_pivots, prev_sell_ts, peak.ts, buy_min_bars)
        if troughs:
            scores = [1.0 / max(t.price, 1e-9) for t in troughs]
            weights = _normalize_weights(scores)
            for t, w in zip(troughs, weights):
                row = _row_at_ts(df, t.ts)
                bb_pos, rsi, disp = _bb_context(row)
                price = float(row["Low"]) if "Low" in row else t.price
                pct = (peak.price - price) / max(price, 1e-9) * 100.0
                trades.append(
                    TradePoint(
                        dt=t.ts.strftime("%Y-%m-%d %H:%M:%S"),
                        action="buy",
                        price=round(price, 2),
                        weight=round(w, 3),
                        leg_id=leg_id,
                        memo=(
                            f"저점 분할 매수 · 비중 {w*100:.0f}% · {len(troughs)}회 "
                            f"· BB하단 · leg#{leg_id}"
                        ),
                        bb_pos=bb_pos,
                        rsi=rsi,
                        pivot_kind="trough",
                        forward_return_pct=round(pct, 2),
                    )
                )

        sell_pts = _peak_sell_points(
            df, peak, GT_MAX_SELLS_PER_LEG, GT_SELL_SPLIT_GAP_PCT
        )
        leg_avg = (
            sum(t.price * t.weight for t in trades if t.leg_id == leg_id and t.action == "buy")
            / max(
                sum(t.weight for t in trades if t.leg_id == leg_id and t.action == "buy"),
                1e-9,
            )
        )
        for sp, w in sell_pts:
            row = _row_at_ts(df, sp.ts)
            bb_pos, rsi, disp = _bb_context(row)
            price = float(row["High"]) if "High" in row else sp.price
            ret = (price - leg_avg) / max(leg_avg, 1e-9) * 100.0 if leg_avg > 0 else None
            n_sell = len(sell_pts)
            trades.append(
                TradePoint(
                    dt=sp.ts.strftime("%Y-%m-%d %H:%M:%S"),
                    action="sell",
                    price=round(price, 2),
                    weight=round(w, 3),
                    leg_id=leg_id,
                    memo=(
                        f"고점 매도 · 비중 {w*100:.0f}% · "
                        f"{'분할' if n_sell > 1 else '1회'} · leg#{leg_id}"
                    ),
                    bb_pos=bb_pos,
                    rsi=rsi,
                    pivot_kind="peak",
                    forward_return_pct=round(ret, 2) if ret is not None else None,
                )
            )

        prev_sell_ts = peak.ts

    # 마지막 매도 이후 ~ 기간 끝: 미청산 구간 분할 매수만
    if sell_peaks:
        last_peak = sell_peaks[-1]
        troughs = _collect_buy_troughs(
            df, buy_pivots, last_peak.ts, df.index[-1], buy_min_bars
        )
        leg_id = len(sell_peaks)
        if troughs:
            weights = _normalize_weights([1.0 / max(t.price, 1e-9) for t in troughs])
            for t, w in zip(troughs, weights):
                row = _row_at_ts(df, t.ts)
                bb_pos, rsi, disp = _bb_context(row)
                price = float(row["Low"]) if "Low" in row else t.price
                trades.append(
                    TradePoint(
                        dt=t.ts.strftime("%Y-%m-%d %H:%M:%S"),
                        action="buy",
                        price=round(price, 2),
                        weight=round(w, 3),
                        leg_id=leg_id,
                        memo=f"저점 분할 매수(미청산) · 비중 {w*100:.0f}%",
                        bb_pos=bb_pos,
                        rsi=rsi,
                        pivot_kind="trough",
                    )
                )

    trades.sort(key=lambda t: t.dt)
    return trades


def pivots_to_trades(
    pivots: list[Pivot],
    df: pd.DataFrame,
) -> list[TradePoint]:
    """
    피벗을 매수·매도 정답 타점으로 변환합니다.

    Args:
        pivots: 선택된 피벗.
        df: 지표가 포함된 3분봉.

    Returns:
        TradePoint 리스트.
    """
    trades: list[TradePoint] = []
    last_buy_price: float | None = None

    for i, p in enumerate(pivots):
        loc = (
            p.ts
            if p.ts in df.index
            else df.index[df.index.get_indexer([p.ts], method="nearest")[0]]
        )
        row = df.loc[loc]
        if isinstance(row, pd.DataFrame):
            row = row.iloc[-1]

        bb_pos, rsi, disp = _bb_context(row)
        forward_pct: float | None = None

        if p.kind == "trough":
            action = "buy"
            price = float(row["Low"]) if "Low" in row else p.price
            if i + 1 < len(pivots) and pivots[i + 1].kind == "peak":
                forward_pct = (pivots[i + 1].price - price) / max(price, 1e-9) * 100.0
            last_buy_price = price
        else:
            action = "sell"
            price = float(row["High"]) if "High" in row else p.price
            if last_buy_price:
                forward_pct = (price - last_buy_price) / max(last_buy_price, 1e-9) * 100.0
            last_buy_price = None

        trades.append(
            TradePoint(
                dt=p.ts.strftime("%Y-%m-%d %H:%M:%S"),
                action=action,
                price=round(price, 2),
                weight=1.0,
                memo=_memo_for_trade(action, p, bb_pos, rsi, disp, forward_pct),
                bb_pos=bb_pos,
                rsi=rsi,
                pivot_kind=p.kind,
                forward_return_pct=round(forward_pct, 2) if forward_pct is not None else None,
            )
        )

    return trades


def generate_ground_truth(
    df_3m: pd.DataFrame,
    df_1d: pd.DataFrame | None = None,
    df_1h: pd.DataFrame | None = None,
    min_swing_pct: float = GT_MIN_SWING_PCT,
    pivot_order: int = GT_PIVOT_ORDER,
    min_bars: int = GT_MIN_BARS_BETWEEN,
    max_round_trips: int = GT_MAX_ROUND_TRIPS,
    selection_mode: str = GT_SELECTION_MODE,
) -> dict[str, Any]:
    """
    3분봉 구간에서 정답 타점 JSON 구조를 생성합니다.

    Args:
        df_3m: 3분 OHLCV.
        df_1d: 일봉 (추세 메모용, 선택).
        df_1h: 1시간봉 (추세 메모용, 선택).
        min_swing_pct: ZigZag 최소 스윙(%).
        pivot_order: 국소 극값 반경.
        min_bars: 체결 간 최소 봉 수.
        max_round_trips: 최대 라운드트립.
        selection_mode: zigzag | max_profit.

    Returns:
        ground_truth_trades.json 에 넣을 dict.
    """
    df = apply_bar_indicators(df_3m.sort_index().copy())
    if df.empty:
        raise ValueError("3분봉 데이터가 비어 있습니다.")

    raw_pivots = build_zigzag_pivots(df, min_swing_pct=min_swing_pct, pivot_order=pivot_order)

    if selection_mode == "split_buy_peak_sell":
        selected = _select_optimal_chain(
            raw_pivots,
            min_bars=min_bars,
            max_round_trips=max_round_trips,
            mode="major_swings",
        )
        sell_peaks = [p for p in selected if p.kind == "peak"]
        trades = build_split_buy_peak_sell_trades(df, raw_pivots, sell_peaks)
        method = "split_buy_at_troughs + peak_sell_1or2"
    else:
        selected = _select_optimal_chain(
            raw_pivots,
            min_bars=min_bars,
            max_round_trips=max_round_trips,
            mode=selection_mode,
        )
        trades = pivots_to_trades(selected, df)
        method = "zigzag_pivot + max_profit_chain"

    trend = "range"
    if df_1d is not None and df_1h is not None:
        trend = get_trend(df_1d, df_1h)

    round_trips = len({t.leg_id for t in trades if t.action == "sell"})
    buy_count = sum(1 for t in trades if t.action == "buy")
    sell_count = sum(1 for t in trades if t.action == "sell")
    total_ret = sum(
        t.forward_return_pct or 0.0 for t in trades if t.action == "sell"
    )

    trades.sort(key=lambda t: t.dt)
    last_close = float(df["Close"].iloc[-1])
    pnl = simulate_truth_portfolio(
        [asdict(t) for t in trades],
        initial_cash=GT_INITIAL_CASH_KRW,
        fee_rate=TRADING_FEE_RATE,
        last_price=last_close,
    )

    return {
        "name": "ground_truth_split_buy_peak_sell",
        "method": method,
        "symbol": SYMBOL,
        "interval_min": ENTRY_INTERVAL,
        "lookback_days": CHART_LOOKBACK_DAYS,
        "period_start": str(df.index[0]),
        "period_end": str(df.index[-1]),
        "trend_at_end": trend,
        "params": {
            "min_swing_pct": min_swing_pct,
            "pivot_order": pivot_order,
            "min_bars_between": min_bars,
            "max_round_trips": max_round_trips,
            "selection_mode": selection_mode,
            "buy_min_swing_pct": GT_BUY_MIN_SWING_PCT,
            "buy_bb_max": GT_BUY_BB_MAX,
            "max_sells_per_leg": GT_MAX_SELLS_PER_LEG,
        },
        "summary": {
            "pivot_candidates": len(raw_pivots),
            "sell_peaks": len([p for p in selected if p.kind == "peak"]) if selected else 0,
            "trade_count": len(trades),
            "buy_count": buy_count,
            "sell_count": sell_count,
            "round_trips": round_trips,
            "sum_sell_leg_return_pct": round(total_ret, 2),
            **pnl,
        },
        "note": (
            "저점 분할 매수(삼각형 크기=비중), 고점 1~2회 매도. "
            "사후 라벨·캘리브레이션용."
        ),
        "trades": [asdict(t) for t in trades],
    }


def _truth_simulation_rows(trades: list[dict[str, Any]]) -> list[dict[str, Any]]:
    """TradePoint/dict 리스트를 시간순 dict 행으로 정규화."""
    return sorted(
        [t if isinstance(t, dict) else asdict(t) for t in trades],
        key=lambda x: x["dt"],
    )


def simulate_truth_portfolio_steps(
    trades: list[dict[str, Any]] | list[TradePoint],
    initial_cash: float = GT_INITIAL_CASH_KRW,
    fee_rate: float = TRADING_FEE_RATE,
) -> list[dict[str, Any]]:
    """
    체결마다 현금·보유·총평가(현금+보유×체결가) 스냅샷을 반환합니다.

    Args:
        trades: JSON trades 또는 TradePoint 리스트.
        initial_cash: 시작 원화.
        fee_rate: 매수·매도 수수료율.

    Returns:
        체결 시각순 스냅샷 dict 리스트 (total_asset_krw, cash_krw, holding_qty 등).
    """
    rows = _truth_simulation_rows(trades)
    cash = float(initial_cash)
    qty = 0.0
    leg_budget = 0.0
    current_leg: int | None = None
    sell_leg: int | None = None
    sell_base_qty = 0.0
    steps: list[dict[str, Any]] = []

    for t in rows:
        action = t["action"]
        price = float(t["price"])
        weight = float(t.get("weight", 1.0))
        leg_id = int(t.get("leg_id", 0))

        if action == "buy":
            if leg_id != current_leg:
                current_leg = leg_id
                leg_budget = cash
                sell_leg = None
            amount = leg_budget * weight
            if amount <= 0:
                continue
            fee = amount * fee_rate
            spend = amount + fee
            if spend > cash:
                amount = max(cash / (1.0 + fee_rate), 0.0)
                fee = amount * fee_rate
                spend = amount + fee
            cash -= spend
            if price > 0:
                qty += amount / price

        elif action == "sell" and qty > 0:
            if leg_id != sell_leg:
                sell_leg = leg_id
                sell_base_qty = qty
            sell_qty = min(sell_base_qty * weight, qty)
            if sell_qty <= 0:
                continue
            gross = sell_qty * price
            fee = gross * fee_rate
            cash += gross - fee
            qty -= sell_qty
            if qty < 1e-12:
                qty = 0.0

        total_asset = cash + qty * price
        steps.append(
            {
                "dt": t["dt"],
                "action": action,
                "price": price,
                "weight": weight,
                "leg_id": leg_id,
                "cash_krw": round(cash, 0),
                "holding_qty": round(qty, 4),
                "total_asset_krw": round(total_asset, 0),
            }
        )

    return steps


def simulate_truth_portfolio(
    trades: list[dict[str, Any]] | list[TradePoint],
    initial_cash: float = GT_INITIAL_CASH_KRW,
    fee_rate: float = TRADING_FEE_RATE,
    last_price: float | None = None,
) -> dict[str, Any]:
    """
    분할 매수·매도를 시간순으로 적용한 뒤, 초기·기말 총평가로 수익을 계산합니다.

    - 초기 총평가 = initial_cash (전액 현금, 보유 0).
    - 매수/매도마다 그 시점 현금·보유 수량을 갱신 (분할 비중 weight 반영).
    - 기말 총평가 = 현금 + 보유수량 × mark_price(미청산은 종가 평가).
    - 수익금 = 기말 총평가 − 초기 총평가.
    - 수익률(%) = 수익금 / 초기 총평가 × 100.

    분할 매도: 같은 leg의 첫 매도 시점 보유량 기준으로 weight 합이 1이 되도록 매도
    (0.65+0.35를 남은 수량에 연속 적용하지 않음).

    Args:
        trades: JSON trades 또는 TradePoint 리스트.
        initial_cash: 시작 원화 (기본 100만).
        fee_rate: 매수·매도 각각 적용 수수료율.
        last_price: 미청산 평가용 종가. None이면 마지막 체결가.

    Returns:
        initial_cash, final_asset, pnl_krw, pnl_pct, total_fees, holding_qty 등.
    """
    rows = _truth_simulation_rows(trades)
    cash = float(initial_cash)
    qty = 0.0
    total_fees = 0.0
    leg_budget = 0.0
    current_leg: int | None = None
    sell_leg: int | None = None
    sell_base_qty = 0.0
    last_trade_price = last_price

    for t in rows:
        action = t["action"]
        price = float(t["price"])
        weight = float(t.get("weight", 1.0))
        leg_id = int(t.get("leg_id", 0))
        last_trade_price = price

        if action == "buy":
            if leg_id != current_leg:
                current_leg = leg_id
                leg_budget = cash
                sell_leg = None
            amount = leg_budget * weight
            if amount <= 0:
                continue
            fee = amount * fee_rate
            spend = amount + fee
            if spend > cash:
                amount = max(cash / (1.0 + fee_rate), 0.0)
                fee = amount * fee_rate
                spend = amount + fee
            cash -= spend
            total_fees += fee
            if price > 0:
                qty += amount / price

        elif action == "sell" and qty > 0:
            if leg_id != sell_leg:
                sell_leg = leg_id
                sell_base_qty = qty
            sell_qty = min(sell_base_qty * weight, qty)
            if sell_qty <= 0:
                continue
            gross = sell_qty * price
            fee = gross * fee_rate
            cash += gross - fee
            total_fees += fee
            qty -= sell_qty
            if qty < 1e-12:
                qty = 0.0

    mark_price = float(last_price if last_price is not None else last_trade_price or 0)
    holding_value = qty * mark_price
    final_asset = cash + holding_value
    pnl_krw = final_asset - initial_cash
    pnl_pct = pnl_krw / initial_cash * 100.0 if initial_cash else 0.0

    return {
        "initial_cash_krw": round(initial_cash, 0),
        "final_asset_krw": round(final_asset, 0),
        "pnl_krw": round(pnl_krw, 0),
        "pnl_pct": round(pnl_pct, 2),
        "total_fees_krw": round(total_fees, 0),
        "cash_krw": round(cash, 0),
        "holding_qty": round(qty, 6),
        "holding_value_krw": round(holding_value, 0),
        "mark_price": round(mark_price, 2),
        "fee_rate": fee_rate,
    }


def save_ground_truth(data: dict[str, Any], path: Path = DEFAULT_OUTPUT) -> Path:
    """정답 JSON 저장."""
    path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
    return path


def load_ground_truth(path: Path = DEFAULT_OUTPUT) -> dict[str, Any] | None:
    """정답 JSON 로드."""
    if not path.exists():
        return None
    return json.loads(path.read_text(encoding="utf-8"))


def _report_month_gaps(trades: list[dict[str, Any]]) -> list[str]:
    """거래가 없는 연속 월(YYYY-MM) 목록."""
    if not trades:
        return []
    from collections import Counter

    months = sorted({t["dt"][:7] for t in trades})
    gaps: list[str] = []
    y1, m1 = map(int, months[0].split("-"))
    for label in months[1:]:
        y2, m2 = map(int, label.split("-"))
        gap = (y2 - y1) * 12 + (m2 - m1)
        if gap > 1:
            gaps.append(f"{months[months.index(label) - 1]} → {label} ({gap - 1}개월 공백)")
        y1, m1 = y2, m2
    return gaps


def print_ground_truth_report(data: dict[str, Any]) -> None:
    """터미널 요약 출력."""
    s = data.get("summary", {})
    trades = data.get("trades") or []
    print(f"\n[정답 타점] {data.get('period_start')} ~ {data.get('period_end')}")
    print(
        f"  피벗 {s.get('pivot_candidates')} | 매수 {s.get('buy_count')} / 매도 {s.get('sell_count')} "
        f"| leg {s.get('round_trips')}"
    )
    print(f"  매도 수익 합(참고): {s.get('sum_sell_leg_return_pct')}%")
    if s.get("initial_cash_krw"):
        print(
            f"  시뮬(시작 ₩{s['initial_cash_krw']:,.0f}): "
            f"최종 ₩{s['final_asset_krw']:,.0f} | "
            f"수익 ₩{s['pnl_krw']:+,.0f} ({s['pnl_pct']:+.2f}%) | "
            f"수수료 ₩{s['total_fees_krw']:,.0f}"
        )
        if s.get("holding_qty", 0) > 0:
            print(
                f"  미청산: {s['holding_qty']}개 "
                f"(평가 ₩{s['holding_value_krw']:,.0f}, 종가 ₩{s['mark_price']:,.0f})"
            )
    print(f"  파라미터: {data.get('params')}")
    from collections import Counter

    by_month = Counter(t["dt"][:7] for t in trades)
    print(f"  월별 타점: {', '.join(f'{m}({c})' for m, c in sorted(by_month.items()))}")
    gaps = _report_month_gaps(trades)
    if gaps:
        print(f"  경고 — 거래 공백 월: {'; '.join(gaps)}")
    else:
        print("  월별 공백: 없음 (연속 커버)")
    show = trades if len(trades) <= 40 else trades[:20] + trades[-10:]
    if len(trades) > 40:
        print(f"  (타점 {len(trades)}건 — 앞 20·뒤 10건만 표시)")
    for t in show:
        mark = "매수" if t["action"] == "buy" else "매도"
        w = float(t.get("weight", 1.0))
        ret = t.get("forward_return_pct")
        ret_s = f" (+{ret}%)" if ret is not None else ""
        print(
            f"  [{mark}] {t['dt'][:16]} ₩{t['price']:,.0f} "
            f"비중{w*100:.0f}%{ret_s}  {t.get('memo', '')}"
        )


def run_from_db(monitor=None, output: Path = DEFAULT_OUTPUT) -> dict[str, Any]:
    """
    coins.db에서 CHART_LOOKBACK_DAYS 구간을 읽어 정답을 생성·저장합니다.

    Args:
        monitor: Monitor 인스턴스. None이면 새로 생성.
        output: 저장 경로.

    Returns:
        생성된 dict.
    """
    from config import TREND_INTERVAL_1D, TREND_INTERVAL_1H
    from monitor import Monitor

    mon = monitor or Monitor(cooldown_file=None)
    print(f"정답 생성: 최근 {CHART_LOOKBACK_DAYS}일 3분봉")
    frames = load_frames_from_db(mon, SYMBOL, lookback_days=CHART_LOOKBACK_DAYS)
    df_3m = frames.get(ENTRY_INTERVAL)
    if df_3m is None or df_3m.empty:
        raise RuntimeError("3분봉 없음. python scripts/01_download.py 실행 후 재시도.")

    df_1d = frames.get(TREND_INTERVAL_1D)
    if df_1d is None or df_1d.empty:
        df_1d = df_3m
    df_1h = frames.get(TREND_INTERVAL_1H)
    if df_1h is None or df_1h.empty:
        df_1h = df_3m

    data = generate_ground_truth(df_3m, df_1d, df_1h)
    save_ground_truth(data, output)
    print(f"저장: {output}")
    print_ground_truth_report(data)
    return data


def main() -> None:
    """CLI: 정답 JSON 생성."""
    run_from_db()


if __name__ == "__main__":
    main()