"""
GT 타점 MTF 프로필 반복 보강 — 스냅샷 recall·총자산 비율 90% 목표.
"""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

import numpy as np
import pandas as pd

from config import (
    GENERAL_ANALYSIS_INTERVALS,
    MATCH_PROFILE_MIN_SAMPLES,
    MATCH_PROFILE_MIN_SEPARATION,
)
from deepcoin.analysis.general_analysis_core import interval_tf_prefix
from deepcoin.matching.config import ANALYSIS_TRADES_CSV
from deepcoin.matching.gt_asset_calibration import (
    evaluate_gt_snapshot_recall,
    portfolio_asset_ratio,
)
from deepcoin.matching.gt_mtf_profile import (
    analyze_gt_mtf_profile,
    discover_profile_columns,
)
from deepcoin.matching.profile_rules import (
    _condition_from_series,
    _feature_separation,
    build_rule_candidates,
)
from deepcoin.matching.rule_eval import eval_rule_mask
from deepcoin.paths import (
    ANALYSIS_GT_CALIBRATION_JSON,
    ANALYSIS_GT_MTF_PROFILE_JSON,
    resolve_ground_truth_file,
)
from deepcoin.ground_truth.ground_truth import load_ground_truth


def _condition_or_group(
    series: pd.Series,
    side: str,
    quantile_lo: float = 0.15,
    quantile_hi: float = 0.85,
) -> dict[str, Any] | None:
    """
    한 컬럼 GT 분포에서 between 조건.

    Args:
        series: side GT 값.
        side: buy | sell.
        quantile_lo: 하한 분위.
        quantile_hi: 상한 분위.

    Returns:
        조건 dict.
    """
    col_name = series.name
    if series.dtype == object or not pd.api.types.is_numeric_dtype(series):
        mode = series.dropna().astype(str).mode()
        if mode.empty:
            return None
        return {"col": col_name, "op": "eq", "value": str(mode.iloc[0])}
    s = pd.to_numeric(series, errors="coerce").dropna()
    if len(s) < MATCH_PROFILE_MIN_SAMPLES:
        return None
    lo = float(s.quantile(quantile_lo))
    hi = float(s.quantile(quantile_hi))
    if lo >= hi:
        return None
    return {"col": col_name, "op": "between", "lo": lo, "hi": hi}


def build_or_tf_rules(
    buy: pd.DataFrame,
    sell: pd.DataFrame,
    ranked_cols: list[str],
    *,
    per_tf: int = 4,
) -> list[dict[str, Any]]:
    """
    TF별 OR 복합 규칙 (해당 TF 상위 분리 컬럼 중 하나만 충족).

    Args:
        buy: 매수 GT.
        sell: 매도 GT.
        ranked_cols: 분리도 순 컬럼.
        per_tf: TF당 OR 조건 수.

    Returns:
        rule dict 리스트.
    """
    rules: list[dict[str, Any]] = []
    for side, subset in (("buy", buy), ("sell", sell)):
        for iv in GENERAL_ANALYSIS_INTERVALS:
            pfx = interval_tf_prefix(iv)
            iv_cols = [
                c
                for c in ranked_cols
                if c.startswith(f"{pfx}_") and c in subset.columns
            ]
            iv_cols = sorted(
                iv_cols,
                key=lambda c: _feature_separation(buy, sell, c),
                reverse=True,
            )[:per_tf]
            conds: list[dict[str, Any]] = []
            for col in iv_cols:
                c = _condition_or_group(subset[col], side, 0.20, 0.80)
                if c:
                    conds.append(c)
            if len(conds) >= 2 and pfx not in ("m240",):
                rules.append(
                    {
                        "rule_id": f"{side}_or_{pfx}",
                        "side": side,
                        "kind": "or_tf",
                        "logic": "or",
                        "conditions": conds,
                    }
                )
    return rules


def build_unmatched_atomic_rules(
    trades_df: pd.DataFrame,
    rules: list[dict[str, Any]],
    side: str,
    *,
    max_new: int = 12,
) -> list[dict[str, Any]]:
    """
    스냅샷 미매칭 GT 행에서 분리도 큰 컬럼 atomic 규칙 추가.

    Args:
        trades_df: 03b CSV.
        rules: 기존 규칙.
        side: buy | sell.

    Returns:
        신규 atomic rule dict.
    """
    gt = trades_df[trades_df["action"] == side]
    buy_all = trades_df[trades_df["action"] == "buy"]
    sell_all = trades_df[trades_df["action"] == "sell"]
    side_rules = [r for r in rules if r.get("side") == side]

    unmatched_idx: list[int] = []
    for idx, row in gt.iterrows():
        fr = pd.DataFrame([row])
        if not any(bool(eval_rule_mask(fr, r).iloc[0]) for r in side_rules):
            unmatched_idx.append(idx)

    if not unmatched_idx:
        return []

    unmatched = gt.loc[unmatched_idx]
    matched = gt.drop(index=unmatched_idx, errors="ignore")
    other = sell_all if side == "buy" else buy_all

    cols = discover_profile_columns(trades_df)
    scores: list[tuple[float, str]] = []
    for col in cols:
        if col not in unmatched.columns:
            continue
        if not pd.api.types.is_numeric_dtype(unmatched[col]):
            continue
        u = pd.to_numeric(unmatched[col], errors="coerce").dropna()
        m = pd.to_numeric(matched[col], errors="coerce").dropna() if len(matched) >= 5 else pd.to_numeric(gt[col], errors="coerce").dropna()
        o = pd.to_numeric(other[col], errors="coerce").dropna()
        if len(u) < 3 or len(o) < 5:
            continue
        sep = abs(float(u.mean() - o.mean())) / (np.sqrt((u.var() + o.var()) / 2) + 1e-9)
        scores.append((sep, col))

    scores.sort(reverse=True)
    new_rules: list[dict[str, Any]] = []
    existing_cols = {
        c["col"]
        for r in rules
        if r.get("side") == side
        for c in r.get("conditions", [])
    }
    for sep, col in scores[: max_new * 3]:
        if col in existing_cols:
            continue
        if sep < MATCH_PROFILE_MIN_SEPARATION * 0.5:
            continue
        cond = _condition_from_series(unmatched[col], side)
        if cond is None:
            cond = _condition_or_group(unmatched[col], side, 0.10, 0.90)
        if cond is None:
            continue
        rid = f"{side}_cal_{col}"
        new_rules.append(
            {
                "rule_id": rid,
                "side": side,
                "kind": "calibration_atomic",
                "logic": "and",
                "conditions": [cond],
                "profile_col": col,
                "calibration_sep": round(sep, 4),
            }
        )
        existing_cols.add(col)
        if len(new_rules) >= max_new:
            break
    return new_rules


def _feature_separation_df(
    buy: pd.DataFrame,
    sell: pd.DataFrame,
    col: str,
) -> float:
    """DataFrame 컬럼 분리도."""
    if col not in buy.columns:
        return 0.0
    a = pd.to_numeric(buy[col], errors="coerce").dropna()
    b = pd.to_numeric(sell[col], errors="coerce").dropna()
    if len(a) < 5 or len(b) < 5:
        return 0.0
    pooled = np.sqrt((a.var() + b.var()) / 2)
    if pooled < 1e-9:
        return abs(float(a.mean() - b.mean()))
    return abs(float(a.mean() - b.mean())) / pooled


def run_profile_calibration_loop(
    trades_csv: Path | None = None,
    *,
    target_recall: float = 0.90,
    target_asset_ratio: float = 0.90,
    max_iterations: int = 5,
) -> dict[str, Any]:
    """
    03b·GT 기준 반복 규칙 보강 및 검증.

    Args:
        trades_csv: 03b CSV.
        target_recall: 매수·매도 스냅샷 recall 목표.
        target_asset_ratio: GT 총자산 대비 subset 비율 목표.
        max_iterations: 최대 반복.

    Returns:
        calibration 리포트 dict.
    """
    path = trades_csv or ANALYSIS_TRADES_CSV
    df = pd.read_csv(path)
    buy = df[df["action"] == "buy"]
    sell = df[df["action"] == "sell"]

    analysis = analyze_gt_mtf_profile(df)
    ANALYSIS_GT_MTF_PROFILE_JSON.parent.mkdir(parents=True, exist_ok=True)
    ANALYSIS_GT_MTF_PROFILE_JSON.write_text(
        json.dumps(analysis, ensure_ascii=False, indent=2),
        encoding="utf-8",
    )

    numeric_ranked = sorted(
        [
            f["col"]
            for f in analysis["features"]
            if f["dtype"] == "numeric"
        ],
        key=lambda c: next(
            (x["separation"] for x in analysis["global_top_separation"] if x["col"] == c),
            _feature_separation_df(buy, sell, c),
        ),
        reverse=True,
    )

    base = build_rule_candidates(path)
    rules: list[dict[str, Any]] = list(base.get("rules", []))
    for r in rules:
        if "logic" not in r:
            r["logic"] = "and"

    rules.extend(build_or_tf_rules(buy, sell, numeric_ranked[:80]))

    history: list[dict[str, Any]] = []
    best_rules: list[dict[str, Any]] = list(rules)
    best_asset_ratio = -1.0
    gt_data = load_ground_truth(resolve_ground_truth_file()) or {}
    gt_trades = gt_data.get("trades") or []
    mark = (gt_data.get("summary") or {}).get("mark_price")

    for it in range(max_iterations):
        recall = evaluate_gt_snapshot_recall(df, rules)
        buy_rec = recall["buy"]["recall"]
        sell_rec = recall["sell"]["recall"]

        buy_legs = {int(t["leg_id"]) for t in gt_trades if t["action"] == "buy"}
        sell_legs = {int(t["leg_id"]) for t in gt_trades if t["action"] == "sell"}
        all_legs = buy_legs | sell_legs

        included_legs = set()
        gt_df = pd.DataFrame(gt_trades)
        for lid in all_legs:
            leg = gt_df[gt_df["leg_id"] == lid]
            leg_buy_ok = True
            leg_sell_ok = True
            for _, row in leg[leg["action"] == "buy"].iterrows():
                sub = df[(df["dt"] == row["dt"]) & (df["action"] == "buy")]
                if sub.empty:
                    leg_buy_ok = False
                    break
                fr = pd.DataFrame([sub.iloc[0]])
                if not any(
                    bool(eval_rule_mask(fr, r).iloc[0])
                    for r in rules
                    if r.get("side") == "buy"
                ):
                    leg_buy_ok = False
                    break
            for _, row in leg[leg["action"] == "sell"].iterrows():
                sub = df[(df["dt"] == row["dt"]) & (df["action"] == "sell")]
                if sub.empty:
                    leg_sell_ok = False
                    break
                fr = pd.DataFrame([sub.iloc[0]])
                if not any(
                    bool(eval_rule_mask(fr, r).iloc[0])
                    for r in rules
                    if r.get("side") == "sell"
                ):
                    leg_sell_ok = False
                    break
            if leg_buy_ok and leg_sell_ok:
                included_legs.add(int(lid))

        asset = portfolio_asset_ratio(gt_trades, included_legs, mark)
        row_hist = {
            "iteration": it,
            "rule_count": len(rules),
            "buy_recall": buy_rec,
            "sell_recall": sell_rec,
            **asset,
        }
        history.append(row_hist)
        print(
            f"[cal {it}] rules={len(rules)} "
            f"buy_rec={buy_rec:.2%} sell_rec={sell_rec:.2%} "
            f"asset_ratio={asset['asset_ratio']:.2%} legs={asset['legs_covered']}/{asset['legs_total']}"
        )
        if asset["asset_ratio"] > best_asset_ratio:
            best_asset_ratio = asset["asset_ratio"]
            best_rules = list(rules)

        if (
            buy_rec >= target_recall
            and sell_rec >= target_recall
            and asset["asset_ratio"] >= target_asset_ratio
        ):
            break

        added = 0
        for side in ("buy", "sell"):
            rec = recall[side]["recall"]
            if rec >= target_recall:
                continue
            new_rules = build_unmatched_atomic_rules(df, rules, side, max_new=15)
            rules.extend(new_rules)
            added += len(new_rules)
        if added == 0:
            rules.extend(build_or_tf_rules(buy, sell, numeric_ranked[:120]))
            for side in ("buy", "sell"):
                rules.extend(
                    build_unmatched_atomic_rules(df, rules, side, max_new=20)
                )
            if len(rules) > 200:
                break

    final_recall = evaluate_gt_snapshot_recall(df, rules)
    final_legs: set[int] = set()
    gt_df = pd.DataFrame(gt_trades)
    for lid in gt_df["leg_id"].unique():
        leg = gt_df[gt_df["leg_id"] == lid]
        ok_b = ok_s = True
        for _, row in leg[leg["action"] == "buy"].iterrows():
            sub = df[(df["dt"] == row["dt"]) & (df["action"] == "buy")]
            if sub.empty or not any(
                bool(eval_rule_mask(pd.DataFrame([sub.iloc[0]]), r).iloc[0])
                for r in rules
                if r.get("side") == "buy"
            ):
                ok_b = False
        for _, row in leg[leg["action"] == "sell"].iterrows():
            sub = df[(df["dt"] == row["dt"]) & (df["action"] == "sell")]
            if sub.empty or not any(
                bool(eval_rule_mask(pd.DataFrame([sub.iloc[0]]), r).iloc[0])
                for r in rules
                if r.get("side") == "sell"
            ):
                ok_s = False
        if ok_b and ok_s:
            final_legs.add(int(lid))

    final_asset = portfolio_asset_ratio(gt_trades, final_legs, mark)

    out = {
        "target_recall": target_recall,
        "target_asset_ratio": target_asset_ratio,
        "iterations": history,
        "final": {
            "rule_count": len(rules),
            "snapshot_recall": final_recall,
            "portfolio": final_asset,
            "targets_met": (
                final_recall["buy"]["recall"] >= target_recall
                and final_recall["sell"]["recall"] >= target_recall
                and final_asset["asset_ratio"] >= target_asset_ratio
            ),
        },
        "calibrated_rules": rules,
    }
    deduped: list[dict[str, Any]] = []
    seen_rid: set[str] = set()
    for r in best_rules:
        rid = r.get("rule_id", "")
        if rid in seen_rid:
            continue
        seen_rid.add(rid)
        deduped.append(r)
    rules = _greedy_recall_cover(df, deduped, target_recall=target_recall)
    out["final"]["rule_count_after_greedy"] = len(rules)
    out["calibrated_rules"] = rules
    out["final"]["snapshot_recall"] = evaluate_gt_snapshot_recall(df, rules)
    final_legs_g: set[int] = set()
    gt_df = pd.DataFrame(gt_trades)
    for lid in gt_df["leg_id"].unique():
        leg = gt_df[gt_df["leg_id"] == lid]
        ok_b = ok_s = True
        for _, row in leg[leg["action"] == "buy"].iterrows():
            sub = df[(df["dt"] == row["dt"]) & (df["action"] == "buy")]
            if sub.empty or not any(
                bool(eval_rule_mask(pd.DataFrame([sub.iloc[0]]), r).iloc[0])
                for r in rules
                if r.get("side") == "buy"
            ):
                ok_b = False
        for _, row in leg[leg["action"] == "sell"].iterrows():
            sub = df[(df["dt"] == row["dt"]) & (df["action"] == "sell")]
            if sub.empty or not any(
                bool(eval_rule_mask(pd.DataFrame([sub.iloc[0]]), r).iloc[0])
                for r in rules
                if r.get("side") == "sell"
            ):
                ok_s = False
        if ok_b and ok_s:
            final_legs_g.add(int(lid))
    out["final"]["portfolio"] = portfolio_asset_ratio(
        gt_trades, final_legs_g, mark
    )
    fr = out["final"]["snapshot_recall"]
    pa = out["final"]["portfolio"]
    out["final"]["targets_met"] = (
        fr["buy"]["recall"] >= target_recall
        and fr["sell"]["recall"] >= target_recall
        and pa["asset_ratio"] >= target_asset_ratio
    )
    ANALYSIS_GT_CALIBRATION_JSON.parent.mkdir(parents=True, exist_ok=True)
    ANALYSIS_GT_CALIBRATION_JSON.write_text(
        json.dumps(out, ensure_ascii=False, indent=2),
        encoding="utf-8",
    )
    return out


def _greedy_recall_cover(
    trades_df: pd.DataFrame,
    rules: list[dict[str, Any]],
    *,
    target_recall: float = 0.90,
    max_per_side: int = 40,
) -> list[dict[str, Any]]:
    """
    측면별 recall 목표까지 greedy로 규칙 축소.

    Args:
        trades_df: 03b CSV.
        rules: 후보 규칙 전체.
        target_recall: 목표 recall.

    Returns:
        축소된 규칙 + 기존 compound/mtf_cross 유지.
    """
    keep_kinds = {
        "compound_tight",
        "compound",
        "contrast",
        "mtf_cross",
        "or_tf",
    }
    kept = [r for r in rules if r.get("kind") in keep_kinds]
    pool = [r for r in rules if r not in kept]

    for side in ("buy", "sell"):
        gt = trades_df[trades_df["action"] == side]
        if gt.empty:
            continue
        uncovered = set(gt.index)
        side_pool = [r for r in pool if r.get("side") == side]
        picked: list[dict[str, Any]] = []
        while uncovered and len(picked) < max_per_side:
            best_rule = None
            best_new = 0
            for rule in side_pool:
                if rule in picked:
                    continue
                new_hit = 0
                for idx in list(uncovered):
                    row = gt.loc[idx]
                    if bool(eval_rule_mask(pd.DataFrame([row]), rule).iloc[0]):
                        new_hit += 1
                if new_hit > best_new:
                    best_new = new_hit
                    best_rule = rule
            if best_rule is None or best_new == 0:
                break
            picked.append(best_rule)
            still = set()
            for idx in uncovered:
                row = gt.loc[idx]
                if not any(
                    bool(eval_rule_mask(pd.DataFrame([row]), r).iloc[0])
                    for r in picked + [x for x in kept if x.get("side") == side]
                ):
                    still.add(idx)
            uncovered = still
            rec = 1.0 - len(uncovered) / len(gt)
            if rec >= target_recall:
                break
        kept.extend(picked)
    return kept