Files
Bithumb/deepcoin/analysis/general_analysis_snapshot.py
dsyoon 2cb67c42b3 GT MTF 프로필·캘리브레이션과 04 매칭/시뮬/실거래 파이프라인을 추가한다.
3분~일봉 GT 타점 분석(03c), leg 체결 순서 수정, 총자산 90% 검증 루프,
walk-forward Go/No-Go 시뮬, monitor·live_trader 및 reference 문서를 포함한다.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-31 11:27:50 +09:00

169 lines
5.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
general_analysis ground truth 타점 MTF 스냅샷 생성.
"""
from __future__ import annotations
import json
import sys
import time
from pathlib import Path
from typing import Any
import pandas as pd
from deepcoin.analysis.general_analysis_align import general_analysis_mtf_scores
from deepcoin.analysis.general_analysis_config import (
DEFAULT_OUTPUT_CSV,
DEFAULT_TRADES_FILE,
GENERAL_ANALYSIS_INTERVALS,
)
from deepcoin.analysis.general_analysis_core import interval_tf_prefix
from deepcoin.analysis.general_analysis_pipeline import general_analysis_enrich_bars, general_analysis_snapshot_at_bar
from deepcoin.ground_truth.ground_truth import load_ground_truth
def _prefixed_snap(snap: dict[str, Any], interval: int) -> dict[str, Any]:
p = interval_tf_prefix(interval)
return {f"{p}_{k}": v for k, v in snap.items()}
def build_trade_mtf_snapshots(
frames: dict[int, pd.DataFrame],
trades: list[dict[str, Any]],
) -> pd.DataFrame:
"""
모든 타점에 대해 8개 간격 general_analysis 스냅샷.
Args:
frames: interval → OHLCV.
trades: ground_truth trades.
Returns:
wide DataFrame (1 row per trade).
"""
n_trades = len(trades)
enriched: dict[int, pd.DataFrame] = {}
t0 = time.time()
print(f"[03b] Phase A: 8TF enrich (1분봉 제외, 전 기법) — {len(GENERAL_ANALYSIS_INTERVALS)}개 간격")
sys.stdout.flush()
for step, iv in enumerate(GENERAL_ANALYSIS_INTERVALS, start=1):
raw = frames.get(iv)
if raw is None or raw.empty:
print(f" [{step}/8] {interval_tf_prefix(iv)} SKIP (데이터 없음)")
sys.stdout.flush()
continue
label = interval_tf_prefix(iv)
print(f" [{step}/8] {label} enrich 시작 ({len(raw):,}봉)...")
sys.stdout.flush()
t_iv = time.time()
enriched[iv] = general_analysis_enrich_bars(raw, iv, full_context=True)
print(f" [{step}/8] {label} 완료 — {len(enriched[iv].columns)}열, {time.time() - t_iv:.0f}")
sys.stdout.flush()
print(f"[03b] Phase A 완료 (누적 {time.time() - t0:.0f}초)")
sys.stdout.flush()
print(f"[03b] Phase B: GT 타점 스냅샷 {n_trades}")
sys.stdout.flush()
rows: list[dict[str, Any]] = []
t_b = time.time()
for i, t in enumerate(sorted(trades, key=lambda x: x["dt"])):
ts = pd.Timestamp(t["dt"])
row: dict[str, Any] = {
"trade_idx": i,
"dt": t["dt"],
"action": t["action"],
"price": t["price"],
"weight": t.get("weight", 1.0),
"leg_id": t.get("leg_id", 0),
"memo": t.get("memo", ""),
}
flat: dict[str, Any] = {}
for iv in GENERAL_ANALYSIS_INTERVALS:
ef = enriched.get(iv)
if ef is None:
continue
snap = general_analysis_snapshot_at_bar(ef, ts, iv)
flat.update(_prefixed_snap(snap, iv))
row.update(flat)
row.update(general_analysis_mtf_scores(flat))
rows.append(row)
done = i + 1
if done == 1 or done % 25 == 0 or done == n_trades:
elapsed = time.time() - t_b
rate = done / elapsed if elapsed > 0 else 0
eta = (n_trades - done) / rate if rate > 0 else 0
print(
f" 타점 {done}/{n_trades} "
f"({elapsed:.0f}초 경과, ETA 약 {eta:.0f}초)"
)
sys.stdout.flush()
print(f"[03b] Phase B 완료 ({time.time() - t_b:.0f}초)")
sys.stdout.flush()
return pd.DataFrame(rows)
def append_missing_gt_snapshots(
frames: dict[int, pd.DataFrame],
trades_path: Path | str = DEFAULT_TRADES_FILE,
output_csv: Path | str = DEFAULT_OUTPUT_CSV,
) -> int:
"""
CSV에 없는 GT 타점만 MTF 스냅샷 추가.
Args:
frames: interval → OHLCV.
trades_path: ground_truth JSON.
output_csv: 03b CSV.
Returns:
추가된 행 수.
"""
out = Path(output_csv)
if not out.is_file():
return 0
data = load_ground_truth(Path(trades_path))
if not data:
return 0
trades = data.get("trades") or []
existing = pd.read_csv(out)
have = set(zip(existing["dt"].astype(str), existing["action"].astype(str)))
missing = [
t
for t in trades
if (str(t["dt"]), str(t["action"])) not in have
]
if not missing:
return 0
print(f"[03b] 누락 GT 타점 {len(missing)}건 스냅샷 추가")
add_df = build_trade_mtf_snapshots(frames, missing)
merged = pd.concat([existing, add_df], ignore_index=True)
merged.to_csv(out, index=False, encoding="utf-8-sig")
print(f"[03b] CSV 갱신: {out} ({len(merged)}행)")
return len(missing)
def export_trade_snapshots(
frames: dict[int, pd.DataFrame],
trades_path: Path | str = DEFAULT_TRADES_FILE,
output_csv: Path | str = DEFAULT_OUTPUT_CSV,
) -> Path:
"""
CSV로 타점 MTF 스냅샷 저장.
Returns:
저장 경로.
"""
data = load_ground_truth(Path(trades_path))
if not data:
raise FileNotFoundError(f"정답 파일 없음: {trades_path}")
trades = data.get("trades") or []
print(f"타점 {len(trades)}× {len(GENERAL_ANALYSIS_INTERVALS)} TF general_analysis")
df = build_trade_mtf_snapshots(frames, trades)
out = Path(output_csv)
out.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(out, index=False, encoding="utf-8-sig")
print(f"저장: {out} ({len(df)}× {len(df.columns)}열)")
return out