Files
Bithumb/deepcoin/analysis/general_analysis_snapshot.py
xavis d7848df6f7 refactor: GT·시뮬·운영 3축 정리 및 hybrid 실거래 정합
Phase C/dry-run·미사용 모듈·재생성 HTML을 제거하고, 운영 체결을
sim_causal_hybrid와 동일한 hybrid 로직으로 통합한다.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-03 23:50:28 +09:00

131 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
general_analysis ground truth 타점 MTF 스냅샷 생성.
"""
from __future__ import annotations
import json
import sys
import time
from pathlib import Path
from typing import Any
import pandas as pd
from deepcoin.analysis.general_analysis_align import general_analysis_mtf_scores
from deepcoin.analysis.general_analysis_config import (
DEFAULT_OUTPUT_CSV,
DEFAULT_TRADES_FILE,
GENERAL_ANALYSIS_INTERVALS,
)
from deepcoin.analysis.general_analysis_core import interval_tf_prefix
from deepcoin.analysis.general_analysis_pipeline import general_analysis_enrich_bars, general_analysis_snapshot_at_bar
from deepcoin.ground_truth.ground_truth import load_ground_truth
def _prefixed_snap(snap: dict[str, Any], interval: int) -> dict[str, Any]:
p = interval_tf_prefix(interval)
return {f"{p}_{k}": v for k, v in snap.items()}
def build_trade_mtf_snapshots(
frames: dict[int, pd.DataFrame],
trades: list[dict[str, Any]],
) -> pd.DataFrame:
"""
모든 타점에 대해 8개 간격 general_analysis 스냅샷.
Args:
frames: interval → OHLCV.
trades: ground_truth trades.
Returns:
wide DataFrame (1 row per trade).
"""
n_trades = len(trades)
enriched: dict[int, pd.DataFrame] = {}
t0 = time.time()
print(
f"[03b] MTF enrich (주·월봉 포함) — {len(GENERAL_ANALYSIS_INTERVALS)}개 간격"
)
sys.stdout.flush()
for step, iv in enumerate(GENERAL_ANALYSIS_INTERVALS, start=1):
raw = frames.get(iv)
if raw is None or raw.empty:
print(f" [{step}/8] {interval_tf_prefix(iv)} SKIP (데이터 없음)")
sys.stdout.flush()
continue
label = interval_tf_prefix(iv)
print(f" [{step}/8] {label} enrich 시작 ({len(raw):,}봉)...")
sys.stdout.flush()
t_iv = time.time()
enriched[iv] = general_analysis_enrich_bars(raw, iv, full_context=True)
print(f" [{step}/8] {label} 완료 — {len(enriched[iv].columns)}열, {time.time() - t_iv:.0f}")
sys.stdout.flush()
print(f"[03b] Phase A 완료 (누적 {time.time() - t0:.0f}초)")
sys.stdout.flush()
print(f"[03b] Phase B: GT 타점 스냅샷 {n_trades}")
sys.stdout.flush()
rows: list[dict[str, Any]] = []
t_b = time.time()
for i, t in enumerate(sorted(trades, key=lambda x: x["dt"])):
ts = pd.Timestamp(t["dt"])
row: dict[str, Any] = {
"trade_idx": i,
"dt": t["dt"],
"action": t["action"],
"price": t["price"],
"weight": t.get("weight", 1.0),
"leg_id": t.get("leg_id", 0),
"memo": t.get("memo", ""),
}
flat: dict[str, Any] = {}
for iv in GENERAL_ANALYSIS_INTERVALS:
ef = enriched.get(iv)
if ef is None:
continue
snap = general_analysis_snapshot_at_bar(ef, ts, iv)
flat.update(_prefixed_snap(snap, iv))
row.update(flat)
row.update(general_analysis_mtf_scores(flat))
rows.append(row)
done = i + 1
if done == 1 or done % 25 == 0 or done == n_trades:
elapsed = time.time() - t_b
rate = done / elapsed if elapsed > 0 else 0
eta = (n_trades - done) / rate if rate > 0 else 0
print(
f" 타점 {done}/{n_trades} "
f"({elapsed:.0f}초 경과, ETA 약 {eta:.0f}초)"
)
sys.stdout.flush()
print(f"[03b] Phase B 완료 ({time.time() - t_b:.0f}초)")
sys.stdout.flush()
return pd.DataFrame(rows)
def export_trade_snapshots(
frames: dict[int, pd.DataFrame],
trades_path: Path | str = DEFAULT_TRADES_FILE,
output_csv: Path | str = DEFAULT_OUTPUT_CSV,
) -> Path:
"""
CSV로 타점 MTF 스냅샷 저장.
Returns:
저장 경로.
"""
data = load_ground_truth(Path(trades_path))
if not data:
raise FileNotFoundError(f"정답 파일 없음: {trades_path}")
trades = data.get("trades") or []
print(f"타점 {len(trades)}× {len(GENERAL_ANALYSIS_INTERVALS)} TF general_analysis")
df = build_trade_mtf_snapshots(frames, trades)
out = Path(output_csv)
out.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(out, index=False, encoding="utf-8-sig")
print(f"저장: {out} ({len(df)}× {len(df.columns)}열)")
return out