""" general_analysis ground truth 타점 MTF 스냅샷 생성. """ from __future__ import annotations import json import sys import time from pathlib import Path from typing import Any import pandas as pd from deepcoin.analysis.general_analysis_align import general_analysis_mtf_scores from deepcoin.analysis.general_analysis_config import ( DEFAULT_OUTPUT_CSV, DEFAULT_TRADES_FILE, GENERAL_ANALYSIS_INTERVALS, ) from deepcoin.analysis.general_analysis_core import interval_tf_prefix from deepcoin.analysis.general_analysis_pipeline import general_analysis_enrich_bars, general_analysis_snapshot_at_bar from deepcoin.ground_truth.ground_truth import load_ground_truth def _prefixed_snap(snap: dict[str, Any], interval: int) -> dict[str, Any]: p = interval_tf_prefix(interval) return {f"{p}_{k}": v for k, v in snap.items()} def build_trade_mtf_snapshots( frames: dict[int, pd.DataFrame], trades: list[dict[str, Any]], ) -> pd.DataFrame: """ 모든 타점에 대해 8개 간격 general_analysis 스냅샷. Args: frames: interval → OHLCV. trades: ground_truth trades. Returns: wide DataFrame (1 row per trade). """ n_trades = len(trades) enriched: dict[int, pd.DataFrame] = {} t0 = time.time() print(f"[03b] Phase A: 8TF enrich (1분봉 제외, 전 기법) — {len(GENERAL_ANALYSIS_INTERVALS)}개 간격") sys.stdout.flush() for step, iv in enumerate(GENERAL_ANALYSIS_INTERVALS, start=1): raw = frames.get(iv) if raw is None or raw.empty: print(f" [{step}/8] {interval_tf_prefix(iv)} SKIP (데이터 없음)") sys.stdout.flush() continue label = interval_tf_prefix(iv) print(f" [{step}/8] {label} enrich 시작 ({len(raw):,}봉)...") sys.stdout.flush() t_iv = time.time() enriched[iv] = general_analysis_enrich_bars(raw, iv, full_context=True) print(f" [{step}/8] {label} 완료 — {len(enriched[iv].columns)}열, {time.time() - t_iv:.0f}초") sys.stdout.flush() print(f"[03b] Phase A 완료 (누적 {time.time() - t0:.0f}초)") sys.stdout.flush() print(f"[03b] Phase B: GT 타점 스냅샷 {n_trades}건") sys.stdout.flush() rows: list[dict[str, Any]] = [] t_b = time.time() for i, t in enumerate(sorted(trades, key=lambda x: x["dt"])): ts = pd.Timestamp(t["dt"]) row: dict[str, Any] = { "trade_idx": i, "dt": t["dt"], "action": t["action"], "price": t["price"], "weight": t.get("weight", 1.0), "leg_id": t.get("leg_id", 0), "memo": t.get("memo", ""), } flat: dict[str, Any] = {} for iv in GENERAL_ANALYSIS_INTERVALS: ef = enriched.get(iv) if ef is None: continue snap = general_analysis_snapshot_at_bar(ef, ts, iv) flat.update(_prefixed_snap(snap, iv)) row.update(flat) row.update(general_analysis_mtf_scores(flat)) rows.append(row) done = i + 1 if done == 1 or done % 25 == 0 or done == n_trades: elapsed = time.time() - t_b rate = done / elapsed if elapsed > 0 else 0 eta = (n_trades - done) / rate if rate > 0 else 0 print( f" 타점 {done}/{n_trades} " f"({elapsed:.0f}초 경과, ETA 약 {eta:.0f}초)" ) sys.stdout.flush() print(f"[03b] Phase B 완료 ({time.time() - t_b:.0f}초)") sys.stdout.flush() return pd.DataFrame(rows) def append_missing_gt_snapshots( frames: dict[int, pd.DataFrame], trades_path: Path | str = DEFAULT_TRADES_FILE, output_csv: Path | str = DEFAULT_OUTPUT_CSV, ) -> int: """ CSV에 없는 GT 타점만 MTF 스냅샷 추가. Args: frames: interval → OHLCV. trades_path: ground_truth JSON. output_csv: 03b CSV. Returns: 추가된 행 수. """ out = Path(output_csv) if not out.is_file(): return 0 data = load_ground_truth(Path(trades_path)) if not data: return 0 trades = data.get("trades") or [] existing = pd.read_csv(out) have = set(zip(existing["dt"].astype(str), existing["action"].astype(str))) missing = [ t for t in trades if (str(t["dt"]), str(t["action"])) not in have ] if not missing: return 0 print(f"[03b] 누락 GT 타점 {len(missing)}건 스냅샷 추가") add_df = build_trade_mtf_snapshots(frames, missing) merged = pd.concat([existing, add_df], ignore_index=True) merged.to_csv(out, index=False, encoding="utf-8-sig") print(f"[03b] CSV 갱신: {out} ({len(merged)}행)") return len(missing) def export_trade_snapshots( frames: dict[int, pd.DataFrame], trades_path: Path | str = DEFAULT_TRADES_FILE, output_csv: Path | str = DEFAULT_OUTPUT_CSV, ) -> Path: """ CSV로 타점 MTF 스냅샷 저장. Returns: 저장 경로. """ data = load_ground_truth(Path(trades_path)) if not data: raise FileNotFoundError(f"정답 파일 없음: {trades_path}") trades = data.get("trades") or [] print(f"타점 {len(trades)}건 × {len(GENERAL_ANALYSIS_INTERVALS)} TF general_analysis") df = build_trade_mtf_snapshots(frames, trades) out = Path(output_csv) out.parent.mkdir(parents=True, exist_ok=True) df.to_csv(out, index=False, encoding="utf-8-sig") print(f"저장: {out} ({len(df)}행 × {len(df.columns)}열)") return out