Bithumb/deepcoin/matching/position_sizing.py

"""
총자산 대비 GT 모델 매수율(비중) · 보유 현금 한도 · leg tier 배분.
"""

from __future__ import annotations

import json
from datetime import datetime
from pathlib import Path
from typing import Any

from config import (
    GT_BUY_PCT_LARGE_LEG,
    GT_BUY_PCT_SMALL_LEG,
    GT_INITIAL_CASH_KRW,
    GT_LARGE_LEG_TOP_PCT,
    GT_MIN_ORDER_KRW,
    MATCH_GT_TOLERANCE_MIN,
    TRADING_FEE_RATE,
)
from deepcoin.matching.load_rules import load_matched_rules
from deepcoin.paths import MATCHING_FIRE_OUTCOMES, MATCHING_MATCHED_RULES

_GT_ALLOC_ANALYSIS_CACHE: dict[str, Any] | None = None


def portfolio_totals(
    cash: float,
    qty: float,
    price: float,
) -> tuple[float, float, float]:
    """
    총보유자산·코인평가·가용현금(=총자산-평가액)을 계산합니다.

    Args:
        cash: 현금.
        qty: 보유 수량.
        price: 평가·체결가.

    Returns:
        (total_asset_krw, holding_value_krw, cash_krw).
    """
    holding = qty * price
    total = cash + holding
    return total, holding, cash


def optimal_weight_share(weight: float, weight_sum_remaining: float) -> float:
    """
    leg 내 남은 매수 비중 대비 이번 체결 최적 매수율(0~1).

    Args:
        weight: 이번 타점 weight.
        weight_sum_remaining: 동일 leg 남은 매수 weight 합.

    Returns:
        비중 비율.
    """
    if weight_sum_remaining > 0:
        return weight / weight_sum_remaining
    return 1.0


def compute_buy_amount_krw(
    cash: float,
    qty: float,
    price: float,
    weight: float,
    weight_sum_remaining: float,
    *,
    asset_pct_scale: float,
    min_order_krw: float = GT_MIN_ORDER_KRW,
    fee_rate: float = TRADING_FEE_RATE,
) -> float:
    """
    목표=총보유자산×(최적 매수율×scale), 체결=min(목표, 보유현금/(1+fee)) 로 매수 원화를 산출합니다.

    보유 현금 = 총보유자산 − 코인평가액(cash 인자).

    Args:
        cash: 보유 현금(가용 원화).
        qty: 보유 수량.
        price: 체결가.
        weight: 타점 비중.
        weight_sum_remaining: leg 내 남은 매수 weight 합.
        asset_pct_scale: leg·규칙 티어(대형/소형) 스케일.
        min_order_krw: 최소 주문 원화.
        fee_rate: 수수료율.

    Returns:
        매수 원화(0이면 미체결).
    """
    if price <= 0:
        return 0.0
    total_asset, _, available_cash = portfolio_totals(cash, qty, price)
    budget = max(available_cash / (1.0 + fee_rate), 0.0)
    opt_rate = optimal_weight_share(weight, weight_sum_remaining) * asset_pct_scale
    target = total_asset * opt_rate
    amount = min(target, budget)
    if budget >= min_order_krw and 0 < amount < min_order_krw:
        amount = min(min_order_krw, budget)
    return round(max(amount, 0.0), 0)


def large_leg_ids_from_past_returns(
    leg_returns: dict[int, float],
    top_pct: float = GT_LARGE_LEG_TOP_PCT,
) -> set[int]:
    """
    이미 청산된 leg의 realized return 상위 n% (인과적 tier).

    Args:
        leg_returns: leg_id → realized return %.
        top_pct: 상위 비율.

    Returns:
        large leg id set.
    """
    if not leg_returns:
        return set()
    ranked = sorted(leg_returns.items(), key=lambda x: x[1], reverse=True)
    n = max(1, int(len(ranked) * top_pct + 0.999999))
    return {lid for lid, _ in ranked[:n]}


def top_leg_ids_by_forward_return(
    trades: list[dict[str, Any]],
    top_pct: float = GT_LARGE_LEG_TOP_PCT,
) -> set[int]:
    """
    leg별 최대 forward_return 기준 상위 n% leg_id 집합.

    Args:
        trades: GT trade dict.
        top_pct: 상위 비율(0~1).

    Returns:
        대형 매수 leg_id set.
    """
    leg_ret: dict[int, float] = {}
    for t in trades:
        if t.get("action") != "sell":
            continue
        lid = int(t.get("leg_id", 0))
        ret = float(t.get("forward_return_pct") or 0.0)
        leg_ret[lid] = max(leg_ret.get(lid, 0.0), ret)
    if not leg_ret:
        return set()
    ranked = sorted(leg_ret.items(), key=lambda x: x[1], reverse=True)
    n = max(1, int(len(ranked) * top_pct + 0.999999))
    return {lid for lid, _ in ranked[:n]}


def leg_asset_pct_scale(leg_id: int, large_legs: set[int]) -> float:
    """
    leg 티어에 따른 총자산 대비 매수 스케일.

    Args:
        leg_id: leg 번호.
        large_legs: 상위 leg 집합.

    Returns:
        GT_BUY_PCT_LARGE_LEG 또는 GT_BUY_PCT_SMALL_LEG.
    """
    if leg_id in large_legs:
        return float(GT_BUY_PCT_LARGE_LEG)
    return float(GT_BUY_PCT_SMALL_LEG)


def _parse_dt(dt: str) -> datetime:
    return datetime.fromisoformat(str(dt).replace("Z", "+00:00")[:19])


def nearest_gt_leg_id(
    dt: str,
    gt_trades: list[dict[str, Any]],
    tolerance_min: int = MATCH_GT_TOLERANCE_MIN,
) -> int | None:
    """
    시각에 가장 가까운 GT trade의 leg_id (매수 우선).

    Args:
        dt: 발화 시각.
        gt_trades: GT trades.
        tolerance_min: 허용 분.

    Returns:
        leg_id 또는 None.
    """
    if not gt_trades:
        return None
    t0 = _parse_dt(dt)
    best_buy: int | None = None
    best_buy_min = float(tolerance_min) + 1.0
    best_any: int | None = None
    best_any_min = float(tolerance_min) + 1.0
    for t in gt_trades:
        try:
            t1 = _parse_dt(t["dt"])
        except ValueError:
            continue
        delta = abs((t0 - t1).total_seconds()) / 60.0
        if delta > tolerance_min:
            continue
        lid = int(t.get("leg_id", 0))
        if t.get("action") == "buy" and delta < best_buy_min:
            best_buy_min = delta
            best_buy = lid
        if delta < best_any_min:
            best_any_min = delta
            best_any = lid
    return best_buy if best_buy is not None else best_any


_APPROVED_RULES_CACHE: set[str] | None = None


def load_ev_wf_approved_rule_ids(
    matched_path: Path | None = None,
    outcomes_path: Path | None = None,
) -> set[str]:
    """
    holdout EV·PF, walk-forward, 수수료 스트레스를 모두 통과한 rule_id.

    Args:
        matched_path: matched_rules.json.
        outcomes_path: fire_outcomes.csv.

    Returns:
        통과 rule_id set. 산출 불가 시 monitor_rules 전체 fallback.
    """
    global _APPROVED_RULES_CACHE
    if _APPROVED_RULES_CACHE is not None:
        return set(_APPROVED_RULES_CACHE)

    from config import SIM_FEE_STRESS_MULT

    from deepcoin.matching.select_rules import _rule_metrics, _split_train_valid_holdout
    from deepcoin.matching.simulation import (
        evaluate_go_no_go,
        simulate_live_order_cap,
        walk_forward_by_month,
        walk_forward_summary,
    )

    mp = matched_path or MATCHING_MATCHED_RULES
    op = outcomes_path or MATCHING_FIRE_OUTCOMES
    matched = load_matched_rules(mp)
    rules = matched.get("monitor_rules") or []
    if not rules or not op.is_file():
        return {r["rule_id"] for r in rules}

    import pandas as pd

    from config import MATCH_FEE_RATE

    outcomes = pd.read_csv(op)
    outcomes["split"] = _split_train_valid_holdout(outcomes)
    wf_sum = walk_forward_summary(walk_forward_by_month(outcomes))
    fee_stress: dict[str, Any] = {}
    for rid in outcomes["rule_id"].unique():
        sub = outcomes[outcomes["rule_id"] == rid]
        from deepcoin.matching.simulation import _fee_adjust_ret

        adj = _fee_adjust_ret(sub["forward_ret_pct"], SIM_FEE_STRESS_MULT)
        fee_stress[rid] = _rule_metrics(sub.assign(forward_ret_pct=adj))
    monitor_ids = {r["rule_id"] for r in rules}
    live_cap = simulate_live_order_cap(
        outcomes, rule_ids=monitor_ids, holdout_only=True
    )
    go = evaluate_go_no_go(matched, wf_sum, fee_stress, live_cap)
    passed = {c["rule_id"] for c in go.get("checks", []) if c.get("pass")}
    if passed:
        _APPROVED_RULES_CACHE = passed
        return passed
    fallback = monitor_ids
    _APPROVED_RULES_CACHE = fallback
    return fallback


def load_gt_allocation_analysis(
    gt_trades: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
    """
    GT amount_krw 분석 캐시 (tier 권장 pct).

    Args:
        gt_trades: GT trades. None이면 파일 로드.

    Returns:
        analyze_gt_buy_allocation 결과.
    """
    global _GT_ALLOC_ANALYSIS_CACHE
    if _GT_ALLOC_ANALYSIS_CACHE is not None:
        return _GT_ALLOC_ANALYSIS_CACHE
    from deepcoin.ground_truth.gt_allocation_analysis import analyze_gt_buy_allocation
    from deepcoin.paths import resolve_ground_truth_file

    trades = gt_trades
    if trades is None:
        p = resolve_ground_truth_file()
        if p.is_file():
            trades = json.loads(p.read_text(encoding="utf-8")).get("trades") or []
    if not trades:
        _GT_ALLOC_ANALYSIS_CACHE = {}
        return _GT_ALLOC_ANALYSIS_CACHE
    chron = sorted(trades, key=lambda x: x["dt"])
    if not any(float(t.get("amount_krw") or 0) > 0 for t in chron):
        from deepcoin.ground_truth.ground_truth import allocate_gt_order_amounts

        allocate_gt_order_amounts(chron)
    _GT_ALLOC_ANALYSIS_CACHE = analyze_gt_buy_allocation(chron)
    return _GT_ALLOC_ANALYSIS_CACHE


def gt_tier_scale_for_trade(
    trade: dict[str, Any],
    gt_trades: list[dict[str, Any]],
    large_legs: set[int],
    *,
    analysis: dict[str, Any] | None = None,
) -> float:
    """
    GT leg tier 배분 스케일 (분석 권장값 또는 config).

    시뮬은 live_buy_asset_pct_scale 대신 GT와 동일 tier 정책을 사용합니다.

    Args:
        trade: {dt, leg_id?, action, ...}.
        gt_trades: GT trades (leg 매칭).
        large_legs: 상위 leg.
        analysis: analyze_gt_buy_allocation 결과.

    Returns:
        pct_large 또는 pct_small.
    """
    from deepcoin.ground_truth.gt_allocation_analysis import gt_tier_scale_from_analysis

    lid = trade.get("leg_id")
    if lid is None:
        lid = nearest_gt_leg_id(str(trade["dt"]), gt_trades)
    if lid is None:
        return float(GT_BUY_PCT_SMALL_LEG)
    return gt_tier_scale_from_analysis(int(lid), large_legs, analysis)


def live_buy_asset_pct_scale(
    rule_id: str,
    dt: str,
    gt_trades: list[dict[str, Any]],
    *,
    approved_rules: set[str],
    large_legs: set[int],
) -> float:
    """
    실거래 전용 매수 tier (EV/WF·leg 상위). 시뮬은 gt_tier_scale_for_trade 사용.

    Args:
        rule_id: 규칙 ID.
        dt: 체결 시각.
        gt_trades: GT trades.
        approved_rules: 통과 rule_id.
        large_legs: 상위 leg.

    Returns:
        LIVE_BUY_PCT_LARGE 또는 LIVE_BUY_PCT_SMALL(또는 0에 가까운 소형).
    """
    from config import LIVE_BUY_PCT_LARGE, LIVE_BUY_PCT_SMALL

    if rule_id not in approved_rules:
        return float(LIVE_BUY_PCT_SMALL)
    lid = nearest_gt_leg_id(dt, gt_trades)
    if lid is not None and lid in large_legs:
        return float(LIVE_BUY_PCT_LARGE)
    return float(LIVE_BUY_PCT_SMALL)


def enrich_sim_trades_with_gt_weights(
    trades: list[dict[str, Any]],
    gt_trades: list[dict[str, Any]],
    *,
    causal_legs: bool = False,
) -> list[dict[str, Any]]:
    """
    규칙 발화에 GT leg_id·매수/매도 weight를 부여합니다.

    causal_legs=True: GT leg 매칭 없이 매수~매도 구간 순번 leg_id (인과적).

    Args:
        trades: {dt, action/side, price, rule_id}.
        gt_trades: GT trades (leg 매칭, causal_legs=False 일 때).
        causal_legs: 순차 leg_id.

    Returns:
        leg_id·weight가 채워진 trade dict.
    """
    from deepcoin.ground_truth.gt_model import leg_entry_weights, leg_exit_weights

    rows = sorted(trades, key=lambda x: x["dt"])
    pos = 0
    seq_leg = 0
    while pos < len(rows):
        action = rows[pos].get("action", rows[pos].get("side", ""))
        if action != "buy":
            if causal_legs:
                rows[pos]["leg_id"] = seq_leg
            elif "leg_id" not in rows[pos]:
                rows[pos]["leg_id"] = nearest_gt_leg_id(rows[pos]["dt"], gt_trades) or 0
            rows[pos]["weight"] = float(rows[pos].get("weight", 1.0))
            pos += 1
            continue
        buy_end = pos
        while buy_end < len(rows):
            a = rows[buy_end].get("action", rows[buy_end].get("side", ""))
            if a != "buy":
                break
            buy_end += 1
        buy_slice = rows[pos:buy_end]
        sell_slice: list[dict[str, Any]] = []
        sell_end = buy_end
        while sell_end < len(rows):
            a = rows[sell_end].get("action", rows[sell_end].get("side", ""))
            if a == "buy":
                break
            if a == "sell":
                sell_slice.append(rows[sell_end])
            sell_end += 1

        if causal_legs:
            leg_id = seq_leg
        else:
            leg_id = nearest_gt_leg_id(buy_slice[0]["dt"], gt_trades) or 0
        prices = [float(t["price"]) for t in buy_slice]
        buy_weights = leg_entry_weights(prices)
        for t, w in zip(buy_slice, buy_weights):
            t["leg_id"] = leg_id
            t["weight"] = round(w, 4)
            if "action" not in t and "side" in t:
                t["action"] = t["side"]

        if sell_slice:
            sw = leg_exit_weights(len(sell_slice))
            for t, w in zip(sell_slice, sw):
                t["leg_id"] = leg_id
                t["weight"] = round(w, 4)
                if "action" not in t and "side" in t:
                    t["action"] = t["side"]
        if causal_legs and sell_slice:
            seq_leg += 1
        pos = sell_end if sell_slice else buy_end
    return rows


def attach_gt_model_amounts(
    trades: list[dict[str, Any]],
    *,
    gt_trades: list[dict[str, Any]] | None = None,
    approved_rules: set[str] | None = None,
    large_legs: set[int] | None = None,
    initial_cash: float = GT_INITIAL_CASH_KRW,
    fee_rate: float = TRADING_FEE_RATE,
) -> list[dict[str, Any]]:
    """
    GT 모델 비중 + 공통 배분 엔진으로 amount_krw를 채웁니다.

    시뮬·매칭 전용: leg·tier 모두 인과적(과거 청산 leg 수익만). GT 정답 배분은
    ground_truth.allocate_gt_order_amounts 를 사용하세요.

    Args:
        trades: enrich_sim_trades_with_gt_weights 출력 또는 raw fires.
        gt_trades: GT trades. None이면 파일 로드.
        approved_rules: EV/WF 통과 rule (live scale용).
        large_legs: 상위 leg.
        initial_cash: 초기 현금.
        fee_rate: 수수료율.

    Returns:
        amount_krw·weight·leg_id가 채워진 trade dict.
    """
    from deepcoin.ground_truth.gt_allocation import allocate_order_amounts_chronological

    if gt_trades is None:
        gt_trades, _, _ = load_sizing_context_from_gt()

    enriched = enrich_sim_trades_with_gt_weights(
        list(trades),
        gt_trades,
        causal_legs=True,
    )

    allocate_order_amounts_chronological(
        enriched,
        initial_cash=initial_cash,
        fee_rate=fee_rate,
        large_legs=None,
        asset_pct_scale_fn=None,
        causal_tier=True,
    )
    return enriched


def plan_open_position_buy(
    open_buys: list[dict[str, Any]],
    candidate: dict[str, Any],
    cash: float,
    qty: float,
    gt_trades: list[dict[str, Any]] | None = None,
    *,
    large_legs: set[int],
    analysis: dict[str, Any] | None = None,
    fee_rate: float = TRADING_FEE_RATE,
) -> float:
    """
    미청산 포지션 내 다음 매수 원화 (GT tier·보유 현금 한도, 1회 상한 없음).

    Args:
        open_buys: 현재 포지션에서 이미 체결된 매수 dict.
        candidate: 이번 매수 후보 {dt, price, rule_id, leg_id?, ...}.
        cash: 보유 현금.
        qty: 보유 수량.
        gt_trades: GT leg 매칭용.
        large_legs: 상위 leg.
        analysis: GT 배분 분석.
        fee_rate: 수수료율.

    Returns:
        매수 계획 원화.
    """
    from deepcoin.ground_truth.gt_model import leg_entry_weights

    if gt_trades is None:
        gt_trades, _, _ = load_sizing_context_from_gt()
    if analysis is None:
        analysis = load_gt_allocation_analysis(gt_trades)

    prices = [float(t["price"]) for t in open_buys] + [float(candidate["price"])]
    weights = leg_entry_weights(prices)
    idx = len(open_buys)
    w = weights[idx]
    w_sum = sum(weights[idx:])
    cand = dict(candidate)
    if "leg_id" not in cand:
        cand["leg_id"] = nearest_gt_leg_id(str(candidate["dt"]), gt_trades)
    scale = gt_tier_scale_for_trade(
        cand,
        gt_trades,
        large_legs,
        analysis=analysis,
    )
    return compute_buy_amount_krw(
        cash,
        qty,
        float(candidate["price"]),
        w,
        w_sum,
        asset_pct_scale=scale,
        fee_rate=fee_rate,
    )


def attach_dynamic_buy_amounts(
    trades: list[dict[str, Any]],
    *,
    gt_trades: list[dict[str, Any]] | None = None,
    approved_rules: set[str] | None = None,
    large_legs: set[int] | None = None,
    initial_cash: float = GT_INITIAL_CASH_KRW,
    default_weight: float = 1.0,
    fee_rate: float = TRADING_FEE_RATE,
) -> list[dict[str, Any]]:
    """
    시뮬 발화 trade dict에 amount_krw(GT 모델·보유 현금 한도)를 채웁니다.

    attach_gt_model_amounts 별칭.
    """
    return attach_gt_model_amounts(
        trades,
        gt_trades=gt_trades,
        approved_rules=approved_rules,
        large_legs=large_legs,
        initial_cash=initial_cash,
        fee_rate=fee_rate,
    )


def load_sizing_context_from_gt(
    gt_path: Path | None = None,
) -> tuple[list[dict[str, Any]], set[int], set[str]]:
    """
    GT JSON에서 trades, 상위 leg, EV/WF 통과 rule을 로드합니다.

    Args:
        gt_path: ground_truth_trades.json.

    Returns:
        (gt_trades, large_legs, approved_rules).
    """
    from deepcoin.paths import resolve_ground_truth_file

    p = gt_path or resolve_ground_truth_file()
    trades: list[dict[str, Any]] = []
    if p.is_file():
        data = json.loads(p.read_text(encoding="utf-8"))
        trades = data.get("trades") or []
    large = top_leg_ids_by_forward_return(trades)
    return trades, large, set()