refactor: Git에서 데이터 제거, 설정·코드만 유지

파이프라인 산출물(data/, docs/)을 Git 추적에서 제외하고
히스토리를 단일 커밋으로 재구성해 저장소 용량을 경량화한다.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-06-12 10:01:43 +09:00
commit 741c949470
92 changed files with 12230 additions and 0 deletions

9
scripts/00_download.py Normal file
View File

@@ -0,0 +1,9 @@
#!/usr/bin/env python3
"""사전: 빗썸 캔들 수집 (기본=전체 인터벌 증분, --full=풀 다운)."""
import runpy
from pathlib import Path
if __name__ == "__main__":
target = Path(__file__).resolve().parent / "00_download_candles.py"
runpy.run_path(str(target), run_name="__main__")

View File

@@ -0,0 +1,159 @@
#!/usr/bin/env python3
"""사전: 빗썸 캔들 수집 — 기본: 전체 인터벌 증분 갱신, --full: 전체 인터벌 풀 다운."""
from __future__ import annotations
import argparse
import logging
import sys
from datetime import datetime
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from dataclasses import replace
from deepcoin.config import load_settings
from deepcoin.data.candle_store import CandleStore
from deepcoin.data.downloader import CandleDownloader
from deepcoin.data.intervals import INTERVAL_1MIN, estimate_download_requests, interval_label
def _configure_logging(verbose: bool) -> None:
"""로깅 레벨을 설정한다."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def main() -> int:
"""CLI 진입점."""
parser = argparse.ArgumentParser(
description="빗썸 캔들 데이터 수집 (DOWNLOAD_INTERVALS 전체, 1분봉 포함)",
)
parser.add_argument(
"--full",
action="store_true",
help="전체 인터벌을 DOWNLOAD_DAYS 구간만큼 역방향 풀 다운 (최초 1회·재구축)",
)
parser.add_argument(
"--days",
type=int,
default=None,
help="풀 다운(--full) 또는 DB 비어 있을 때 목표 일수 (기본: DOWNLOAD_DAYS)",
)
parser.add_argument(
"--intervals",
type=str,
default=None,
help="(고급) 쉼표 구분 인터벌만 수집. 기본: .env DOWNLOAD_INTERVALS 전체",
)
parser.add_argument(
"--include-1min",
action="store_true",
help="1분봉(1)을 기존 DOWNLOAD_INTERVALS에 추가하여 수집",
)
parser.add_argument("-v", "--verbose", action="store_true", help="디버그 로그")
args = parser.parse_args()
_configure_logging(args.verbose)
settings = load_settings()
if args.intervals:
settings = replace(
settings,
download_intervals=[
int(x.strip()) for x in args.intervals.split(",") if x.strip()
],
)
elif args.include_1min and INTERVAL_1MIN not in settings.download_intervals:
settings = replace(
settings,
download_intervals=sorted({*settings.download_intervals, INTERVAL_1MIN}),
)
days = args.days or settings.download_days
mode_label = "full" if args.full else "incremental"
log = logging.getLogger(__name__)
log.info(
"대상=%s DB=%s mode=%s days=%s intervals=%s",
settings.market,
settings.db_path,
mode_label,
days,
settings.download_intervals,
)
for interval in settings.download_intervals:
est = estimate_download_requests(interval, days, batch_size=settings.candle_count)
log.info(
"예상 API 요청: %s%s회 (sleep %.2fs)",
interval_label(interval),
est,
settings.request_sleep_sec,
)
store = CandleStore(settings.db_path)
try:
for interval in settings.download_intervals:
if args.full:
est = estimate_download_requests(interval, days, batch_size=settings.candle_count)
log.info(
"예상 API 요청: %s%s회 (풀 다운, sleep %.2fs)",
interval_label(interval),
est,
settings.request_sleep_sec,
)
else:
_, _, db_max = store.get_range(settings.symbol, interval)
if db_max is None:
est = estimate_download_requests(interval, days, batch_size=settings.candle_count)
log.info(
"예상 API 요청: %s%s회 (DB 없음 → 풀 다운)",
interval_label(interval),
est,
)
else:
gap_days = max(1, (datetime.now() - db_max).days + 1)
est = estimate_download_requests(interval, gap_days, batch_size=settings.candle_count)
log.info(
"예상 API 요청: %s%s회 (증분, DB=%s, 갭≈%s일)",
interval_label(interval),
est,
db_max.strftime("%Y-%m-%d %H:%M:%S"),
gap_days,
)
downloader = CandleDownloader(settings)
results = downloader.download_all(store, days=days, full=args.full)
print(f"\n=== 수집 완료 ({mode_label}) ===")
for result in results:
count, min_dt, max_dt = store.get_range(settings.symbol, result.interval_min)
min_s = min_dt.strftime("%Y-%m-%d %H:%M:%S") if min_dt else "-"
max_s = max_dt.strftime("%Y-%m-%d %H:%M:%S") if max_dt else "-"
if result.mode == "uptodate":
flag = "UPTODATE"
elif result.reached_target:
flag = "OK"
else:
flag = "PARTIAL"
label = interval_label(result.interval_min)
print(
f"[{flag}] {label} ({result.interval_min}) mode={result.mode} | "
f"requests={result.requests} upsert={result.saved_rows} "
f"db_rows={count} range={min_s} ~ {max_s}"
)
finally:
store.close()
return 0
if __name__ == "__main__":
raise SystemExit(main())

167
scripts/0_ground_truth.py Normal file
View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python3
"""0단계: Ground Truth — 사후 최적 매수·매도 타점 (v1/v2/v3)."""
from __future__ import annotations
import argparse
import logging
import sys
from dataclasses import replace
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from deepcoin.config import Settings, load_settings
from deepcoin.data.intervals import interval_label
from deepcoin.ground_truth.chart import render_ground_truth_chart
from deepcoin.ground_truth.ground_truth import GtParams, build_ground_truth, save_ground_truth
TIER_DESCRIPTIONS = {
"v1": "스윙만 (최소 매수·매도)",
"v2": "스윙 + 눌림목",
"v3": "스윙 + 눌림목 + 돌파 + 다이버전스",
}
def _configure_logging(verbose: bool) -> None:
"""로깅 레벨을 설정한다."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def _base_params(settings: Settings, args: argparse.Namespace) -> GtParams:
"""CLI·환경 설정을 반영한 공통 GT 파라미터."""
return GtParams(
interval_min=args.interval or settings.gt_interval_min,
lookback_days=args.days or settings.gt_lookback_days,
zigzag_reversal_pct=args.zigzag or settings.gt_zigzag_reversal_pct,
min_leg_pct=args.min_leg or settings.gt_min_leg_pct,
pullback_min_pct=settings.gt_pullback_min_pct,
pullback_local_order=settings.gt_pullback_local_order,
breakout_buffer_pct=settings.gt_breakout_buffer_pct,
breakout_consolidation_bars=settings.gt_breakout_consolidation_bars,
breakout_min_rally_pct=settings.gt_breakout_min_rally_pct,
div_local_order=settings.gt_div_local_order,
div_min_bars_between=settings.gt_div_min_bars_between,
div_min_rsi_diff=settings.gt_div_min_rsi_diff,
div_min_future_move_pct=settings.gt_div_min_future_move_pct,
)
def _tier_targets(settings: Settings, tier_arg: str) -> list[tuple[str, Path, Path]]:
"""생성할 티어 목록 (tier, json_path, chart_path)."""
all_tiers: dict[str, tuple[Path, Path]] = {
"v1": (settings.ground_truth_v1_file, settings.ground_truth_chart_v1_file),
"v2": (settings.ground_truth_v2_file, settings.ground_truth_chart_v2_file),
"v3": (settings.ground_truth_file, settings.ground_truth_chart_v3_file),
}
if tier_arg == "all":
return [(t, *paths) for t, paths in all_tiers.items()]
return [(tier_arg, *all_tiers[tier_arg])]
def _print_tier_summary(
tier: str,
result: dict[str, Any],
json_path: Path,
chart_path: Path | None,
) -> None:
"""티어별 GT 요약을 출력한다."""
summary = result["summary"]
meta = result["meta"]
pnl = result["pnl"]
print(f"\n=== Ground Truth {tier.upper()} ({TIER_DESCRIPTIONS[tier]}) ===")
print(f"대상: {meta['symbol']} ({meta['interval_label']})")
print(f"GT·수익 기간: {meta['data_from']} ~ {meta['data_to']} ({meta['bar_count']}봉)")
print(f"피벗: {meta['pivot_count']}개 → 레그: {summary['leg_count']}")
print(
f"매수 타점: {summary['buy_count']}"
f"(눌림 {summary.get('pullback_buy_count', 0)} / 돌파 {summary.get('breakout_buy_count', 0)} "
f"/ 다이버전스 {summary.get('divergence_buy_count', 0)}) "
f"/ 매도: {summary['sell_count']}개 (다이버전스 {summary.get('divergence_sell_count', 0)})"
)
print(f"레그 수익률 — 평균: {summary['avg_leg_pct']}%, 최대: {summary['max_leg_pct']}%")
period = ""
if pnl.get("period_from"):
period = f" ({pnl['period_from'][:10]} ~ {pnl['period_to'][:10]})"
print(f"누적 수익{period}: {pnl['final_cash_krw']:,.0f}원 ({pnl['total_return_pct']:+.2f}%)")
print(f"JSON: {json_path}")
if chart_path:
print(f"차트: {chart_path}")
def main() -> int:
"""CLI 진입점."""
parser = argparse.ArgumentParser(description="0단계: Ground Truth 타점 생성")
parser.add_argument("--interval", type=int, default=None, help="GT 인터벌(분)")
parser.add_argument("--days", type=int, default=None, help="GT·타점 기간(일). 기본 3447(2017~)")
parser.add_argument("--zigzag", type=float, default=None, help="ZigZag 되돌림 %%")
parser.add_argument("--min-leg", type=float, default=None, help="최소 레그 수익률 %%")
parser.add_argument("--no-chart", action="store_true", help="HTML 차트 생략")
parser.add_argument(
"--tier",
choices=("v1", "v2", "v3", "all"),
default="all",
help="생성할 GT 버전 (v1=스윙만, v2=+눌림, v3=전체, all=3종)",
)
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
_configure_logging(args.verbose)
settings = load_settings()
base = _base_params(settings, args)
tiers = _tier_targets(settings, args.tier)
logging.info(
"GT 생성: %s %s, %s일, ZigZag=%s%%, min_leg=%s%%, 초기=%s원, tier=%s",
settings.symbol,
interval_label(base.interval_min),
base.lookback_days,
base.zigzag_reversal_pct,
base.min_leg_pct,
f"{settings.gt_initial_cash_krw:,.0f}",
args.tier,
)
print("\n=== Ground Truth 완료 (0단계) ===")
print(f"차트·타점 표시: 최근 {settings.download_days}일 (2017~)")
for tier, json_path, chart_path in tiers:
params = replace(base, chart_tier=tier)
result = build_ground_truth(
db_path=settings.db_path,
symbol=settings.symbol,
coin_name=settings.coin_name,
params=params,
initial_cash_krw=settings.gt_initial_cash_krw,
fee_rate=settings.gt_trading_fee_rate,
)
save_ground_truth(result, json_path)
rendered: Path | None = None
if not args.no_chart:
rendered = render_ground_truth_chart(
db_path=settings.db_path,
symbol=settings.symbol,
gt_result=result,
output_path=chart_path,
chart_lookback_days=settings.download_days,
)
_print_tier_summary(tier, result, json_path, rendered)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,119 @@
#!/usr/bin/env python3
"""0단계: 선물 GT 타점 차트 (현물 GT → 롱·숏 4색 마커)."""
from __future__ import annotations
import argparse
import json
import logging
import sys
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from deepcoin.config import Settings, load_settings
from deepcoin.ground_truth.futures_chart import render_futures_ground_truth_chart
TIER_DESCRIPTIONS = {
"v1": "스윙만 (최소 매수·매도)",
"v2": "스윙 + 눌림목",
"v3": "스윙 + 눌림목 + 돌파 + 다이버전스",
}
def _configure_logging(verbose: bool) -> None:
"""로깅 레벨을 설정한다."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def _tier_targets(settings: Settings, tier_arg: str) -> list[tuple[str, Path, Path]]:
"""생성할 티어 목록 (tier, futures_json_path, futures_chart_path)."""
all_tiers: dict[str, tuple[Path, Path]] = {
"v1": (
settings.ground_truth_futures_v1_file,
settings.ground_truth_futures_chart_v1_file,
),
"v2": (
settings.ground_truth_futures_v2_file,
settings.ground_truth_futures_chart_v2_file,
),
"v3": (
settings.ground_truth_futures_file,
settings.ground_truth_futures_chart_v3_file,
),
}
if tier_arg == "all":
return [(t, *paths) for t, paths in all_tiers.items()]
return [(tier_arg, *all_tiers[tier_arg])]
def _load_gt(json_path: Path) -> dict[str, Any]:
"""선물 GT JSON을 로드한다."""
with json_path.open(encoding="utf-8") as fp:
return json.load(fp)
def _print_summary(tier: str, gt_result: dict[str, Any], chart_path: Path) -> None:
"""티어별 선물 차트 요약을 출력한다."""
meta = gt_result["meta"]
summary = gt_result["summary"]
print(f"\n=== 선물 GT 차트 {tier.upper()} ({TIER_DESCRIPTIONS[tier]}) ===")
print(f"대상: {meta['symbol']} ({meta['interval_label']})")
print(f"GT 기간: {meta['data_from']} ~ {meta['data_to']}")
print(
f"선물 GT 타점: 매수 {summary['buy_count']} / 매도 {summary['sell_count']} "
f"→ 선물 상방·하방 각 {summary['buy_count']}/{summary['sell_count']} 마커"
)
print(f"차트: {chart_path}")
def main() -> int:
"""CLI 진입점."""
parser = argparse.ArgumentParser(
description="선물 GT JSON 기반 Ground Truth 차트 (롱·숏 4색)"
)
parser.add_argument(
"--tier",
choices=("v1", "v2", "v3", "all"),
default="all",
help="대상 GT 버전",
)
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
_configure_logging(args.verbose)
settings = load_settings()
tiers = _tier_targets(settings, args.tier)
print("\n=== 선물 Ground Truth 차트 생성 ===")
print("현물 GT 타점 → L↑상방매수 L↓상방매도 S↓하방매수 S↑하방매도")
for tier, json_path, chart_path in tiers:
if not json_path.exists():
logging.error("현물 GT JSON 없음: %s — 먼저 0_ground_truth.py 실행", json_path)
return 1
gt_result = _load_gt(json_path)
render_futures_ground_truth_chart(
db_path=settings.db_path,
symbol=settings.symbol,
gt_result=gt_result,
output_path=chart_path,
chart_lookback_days=settings.download_days,
)
_print_summary(tier, gt_result, chart_path)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,221 @@
#!/usr/bin/env python3
"""1단계: Ground Truth 타점 기준 3년 수익 sim + sim 차트 생성."""
from __future__ import annotations
import argparse
import json
import logging
import sys
from dataclasses import replace
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from deepcoin.config import Settings, load_settings
from deepcoin.data.candle_loader import load_candles
from deepcoin.data.intervals import interval_label
from deepcoin.ground_truth.chart import render_ground_truth_sim_chart
from deepcoin.ground_truth.ground_truth import GtParams, build_ground_truth, save_ground_truth
from deepcoin.ground_truth.pnl import simulate_gt_signals_pnl
TIER_DESCRIPTIONS = {
"v1": "스윙만 (최소 매수·매도)",
"v2": "스윙 + 눌림목",
"v3": "스윙 + 눌림목 + 돌파 + 다이버전스",
}
def _configure_logging(verbose: bool) -> None:
"""로깅 레벨을 설정한다."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def _base_params(settings: Settings, args: argparse.Namespace) -> GtParams:
"""CLI·환경 설정을 반영한 공통 GT 파라미터."""
return GtParams(
interval_min=args.interval or settings.gt_interval_min,
lookback_days=args.days or settings.gt_lookback_days,
zigzag_reversal_pct=args.zigzag or settings.gt_zigzag_reversal_pct,
min_leg_pct=args.min_leg or settings.gt_min_leg_pct,
pullback_min_pct=settings.gt_pullback_min_pct,
pullback_local_order=settings.gt_pullback_local_order,
breakout_buffer_pct=settings.gt_breakout_buffer_pct,
breakout_consolidation_bars=settings.gt_breakout_consolidation_bars,
breakout_min_rally_pct=settings.gt_breakout_min_rally_pct,
div_local_order=settings.gt_div_local_order,
div_min_bars_between=settings.gt_div_min_bars_between,
div_min_rsi_diff=settings.gt_div_min_rsi_diff,
div_min_future_move_pct=settings.gt_div_min_future_move_pct,
)
def _tier_targets(settings: Settings) -> list[tuple[str, Path, Path, Path]]:
"""티어별 (tier, json, chart, sim_chart) 경로."""
return [
(
"v1",
settings.ground_truth_v1_file,
settings.ground_truth_chart_v1_file,
settings.ground_truth_chart_sim_v1_file,
),
(
"v2",
settings.ground_truth_v2_file,
settings.ground_truth_chart_v2_file,
settings.ground_truth_chart_sim_v2_file,
),
(
"v3",
settings.ground_truth_file,
settings.ground_truth_chart_v3_file,
settings.ground_truth_chart_sim_v3_file,
),
]
def _load_or_build_gt(
settings: Settings,
params: GtParams,
json_path: Path,
rebuild: bool,
) -> dict[str, Any]:
"""GT JSON을 로드하거나 새로 생성한다."""
if json_path.exists() and not rebuild:
with json_path.open(encoding="utf-8") as fp:
return json.load(fp)
result = build_ground_truth(
db_path=settings.db_path,
symbol=settings.symbol,
coin_name=settings.coin_name,
params=params,
initial_cash_krw=settings.gt_initial_cash_krw,
fee_rate=settings.gt_trading_fee_rate,
)
save_ground_truth(result, json_path)
return result
def _print_sim_summary(
tier: str,
sim_pnl: dict[str, Any],
sim_chart_path: Path,
) -> None:
"""티어별 시뮬 요약을 출력한다."""
print(f"\n=== 1단계 sim {tier.upper()} ({TIER_DESCRIPTIONS[tier]}) ===")
print(
f"기간: {sim_pnl['period_from']} ~ {sim_pnl['period_to']} "
f"({sim_pnl['sim_lookback_days']}일)"
)
print(
f"초기 {sim_pnl['initial_cash_krw']:,.0f}원 → "
f"최종 {sim_pnl['final_equity_krw']:,.0f}"
f"({sim_pnl['total_return_pct']:+.2f}%)"
)
print(
f"현금 {sim_pnl['final_cash_krw']:,.0f}원 + "
f"코인 {sim_pnl['final_coin_qty']:.8f} "
f"(평가 {sim_pnl['final_coin_value_krw']:,.0f}원)"
)
print(
f"체결 매수 {sim_pnl['buys_executed']} / 매도 {sim_pnl['sells_executed']} | "
f"스킵 매수 {sim_pnl['buys_skipped']} / 매도 {sim_pnl['sells_skipped']}"
)
print(f"sim 차트: {sim_chart_path}")
def main() -> int:
"""CLI 진입점."""
parser = argparse.ArgumentParser(description="1단계: Ground Truth 3년 sim")
parser.add_argument("--interval", type=int, default=None, help="GT 인터벌(분)")
parser.add_argument("--days", type=int, default=None, help="GT 타점 기간(일). 기본 730")
parser.add_argument(
"--sim-days",
type=int,
default=None,
help="시뮬 기간(일). 기본 GT_SIM_LOOKBACK_DAYS 또는 365",
)
parser.add_argument("--zigzag", type=float, default=None, help="ZigZag 되돌림 %%")
parser.add_argument("--min-leg", type=float, default=None, help="최소 레그 수익률 %%")
parser.add_argument(
"--tier",
choices=("v1", "v2", "v3", "all"),
default="all",
help="대상 GT 버전",
)
parser.add_argument(
"--rebuild-gt",
action="store_true",
help="GT JSON을 다시 생성 (없으면 자동 생성)",
)
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
_configure_logging(args.verbose)
settings = load_settings()
base = _base_params(settings, args)
sim_days = args.sim_days or settings.gt_sim_lookback_days
tiers = _tier_targets(settings)
if args.tier != "all":
tiers = [t for t in tiers if t[0] == args.tier]
logging.info(
"1단계 sim: %s %s, GT %s일, sim %s일, 초기=%s",
settings.symbol,
interval_label(base.interval_min),
base.lookback_days,
sim_days,
f"{settings.gt_initial_cash_krw:,.0f}",
)
print("\n=== Ground Truth 1단계 sim ===")
print(f"초기 자본: {settings.gt_initial_cash_krw:,.0f}원 | 시뮬 기간: 최근 {sim_days}")
for tier, json_path, _chart_path, sim_chart_path in tiers:
params = replace(base, chart_tier=tier)
gt_result = _load_or_build_gt(settings, params, json_path, args.rebuild_gt)
df = load_candles(
db_path=settings.db_path,
symbol=settings.symbol,
interval_min=params.interval_min,
lookback_days=base.lookback_days,
)
last_close = float(df["close"].iloc[-1])
sim_pnl = simulate_gt_signals_pnl(
signals=gt_result.get("signals") or [],
initial_cash_krw=settings.gt_initial_cash_krw,
fee_rate=settings.gt_trading_fee_rate,
sim_lookback_days=sim_days,
data_end=gt_result["meta"]["data_to"],
last_mark_price=last_close,
)
gt_result["sim_pnl"] = sim_pnl
save_ground_truth(gt_result, json_path)
render_ground_truth_sim_chart(
db_path=settings.db_path,
symbol=settings.symbol,
gt_result=gt_result,
sim_pnl=sim_pnl,
output_path=sim_chart_path,
chart_lookback_days=settings.download_days,
)
_print_sim_summary(tier, sim_pnl, sim_chart_path)
return 0
if __name__ == "__main__":
raise SystemExit(main())

169
scripts/2_run_causal_sim.py Normal file
View File

@@ -0,0 +1,169 @@
#!/usr/bin/env python3
"""2단계: 인과 기법 sim + 차트 (1단계와 동일 거래 기간·초기 자본)."""
from __future__ import annotations
import argparse
import logging
import sys
import time
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from deepcoin.config import load_settings
from deepcoin.data.candle_loader import load_candles
from deepcoin.data.intervals import interval_label
from deepcoin.evaluation.causal_sim import (
build_causal_sim_report,
render_causal_sim_html,
render_technique_sim_chart,
run_technique_causal_sim,
save_causal_sim_report,
technique_sim_chart_path,
)
from deepcoin.techniques.runner import load_ground_truth, load_technique_results
def _configure_logging(verbose: bool) -> None:
"""로깅 레벨을 설정한다."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def _print_progress(phase: str, current: int, total: int, detail: str) -> None:
"""진행률을 stdout에 출력한다."""
pct = current / total * 100.0 if total else 100.0
msg = f"[{phase}] {current}/{total} ({pct:.1f}%) — {detail}"
print(msg, flush=True)
logging.info(msg)
def main() -> int:
"""CLI 진입점."""
parser = argparse.ArgumentParser(description="2단계: 인과 기법 sim + 차트")
parser.add_argument(
"--techniques",
type=str,
default=None,
help="대상 기법 ID (쉼표 구분). 기본: 전체",
)
parser.add_argument(
"--no-charts",
action="store_true",
help="개별 sim 차트 HTML 생략 (요약 리포트만)",
)
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
_configure_logging(args.verbose)
settings = load_settings()
gt_result = load_ground_truth(settings.ground_truth_file)
gt_meta = gt_result.get("meta", {})
technique_ids = None
if args.techniques:
technique_ids = [t.strip() for t in args.techniques.split(",") if t.strip()]
results = load_technique_results(settings.techniques_dir, technique_ids)
if not results:
logging.error(
"기법 결과 없음: %s — 먼저 2_run_techniques.py 실행",
settings.techniques_dir,
)
return 1
stage1_sim = gt_result.get("sim_pnl")
if not stage1_sim:
logging.warning("GT JSON에 1단계 sim_pnl 없음 — 1_ground_truth_sim.py 권장")
df = load_candles(
db_path=settings.db_path,
symbol=settings.symbol,
interval_min=settings.gt_interval_min,
lookback_days=settings.gt_lookback_days,
)
last_close = float(df["close"].iloc[-1])
data_end = gt_meta.get("data_to")
analysis_dir = settings.causal_sim_report_json.parent
print("\n=== 2단계 인과 sim ===", flush=True)
print(
f"거래 기간: 최근 {settings.gt_sim_lookback_days}일 | "
f"초기 {settings.gt_initial_cash_krw:,.0f}원 | "
f"기법 {len(results)}",
flush=True,
)
sim_pnls: dict[str, dict] = {}
total = len(results)
t0 = time.monotonic()
for idx, result in enumerate(results, start=1):
sim_pnl = run_technique_causal_sim(
result,
initial_cash_krw=settings.gt_initial_cash_krw,
fee_rate=settings.gt_trading_fee_rate,
sim_lookback_days=settings.gt_sim_lookback_days,
data_end=data_end,
last_mark_price=last_close,
)
sim_pnls[result.technique_id] = sim_pnl
_print_progress(
"sim",
idx,
total,
f"{result.technique_id}{sim_pnl.get('total_return_pct', 0):+.2f}%",
)
if not args.no_charts:
chart_path = technique_sim_chart_path(analysis_dir, result.technique_id)
render_technique_sim_chart(
db_path=settings.db_path,
symbol=settings.symbol,
gt_meta=gt_meta,
result=result,
sim_pnl=sim_pnl,
output_path=chart_path,
chart_lookback_days=settings.gt_sim_lookback_days,
)
_print_progress("chart", idx, total, str(chart_path.name))
report = build_causal_sim_report(results, gt_result, settings.symbol, sim_pnls)
report["meta"] = {
"interval_label": interval_label(settings.gt_interval_min),
"initial_cash_krw": settings.gt_initial_cash_krw,
"sim_lookback_days": settings.gt_sim_lookback_days,
}
json_path = save_causal_sim_report(report, settings.causal_sim_report_json)
html_path = render_causal_sim_html(report, settings.causal_sim_report_html)
elapsed = time.monotonic() - t0
print(f"\n=== 2단계 인과 sim 완료 ({elapsed/60:.1f}분) ===", flush=True)
if stage1_sim:
print(
f"1단계 벤치마크(v3): {stage1_sim.get('total_return_pct', 0):+.2f}% "
f"({stage1_sim.get('period_from', '')} ~ {stage1_sim.get('period_to', '')})",
flush=True,
)
top = report["ranking"][:3]
for i, row in enumerate(top, start=1):
print(
f" {i}. {row['technique_name']}: {row['sim_return_pct']:+.2f}% "
f"(GT정합 {row['gt_align_score']*100:.1f})",
flush=True,
)
print(f"요약 JSON: {json_path}", flush=True)
print(f"요약 HTML: {html_path}", flush=True)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python3
"""2단계: GT v3 타점 · 멀티 TF 피처 상관 분석."""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from deepcoin.config import load_settings
from deepcoin.evaluation.mtf_report import (
build_mtf_correlation_report,
render_mtf_html,
save_mtf_report,
)
from deepcoin.mtf.extractor import MtfFeatureExtractor
from deepcoin.mtf.store import MultiTimeframeStore
from deepcoin.techniques.runner import load_ground_truth
def _configure_logging(verbose: bool) -> None:
"""로깅 레벨을 설정한다."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def _resolve_gt_path(settings, gt_file: str | None) -> Path:
"""GT JSON 경로를 결정한다."""
if gt_file:
path = Path(gt_file)
if not path.is_absolute():
path = ROOT / path
return path
return settings.ground_truth_file
def main() -> int:
"""CLI 진입점."""
parser = argparse.ArgumentParser(description="2단계: GT v3 MTF 피처 상관 분석")
parser.add_argument("--gt-file", type=str, default=None, help="GT JSON (기본 v3)")
parser.add_argument(
"--days",
type=int,
default=None,
help="분석 구간(일). 기본 GT_SIM_LOOKBACK_DAYS",
)
parser.add_argument(
"--negative-samples",
type=int,
default=2000,
help="음성 샘플 3분봉 수",
)
parser.add_argument(
"--exclude-bars",
type=int,
default=60,
help="GT 주변 제외 3분봉 수",
)
parser.add_argument("--seed", type=int, default=42, help="음성 샘플 RNG seed")
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
_configure_logging(args.verbose)
settings = load_settings()
gt_path = _resolve_gt_path(settings, args.gt_file)
lookback_days = args.days or settings.gt_sim_lookback_days
logging.info(
"MTF 상관 분석: %s, 최근 %d일, 음성 %d",
gt_path.name,
lookback_days,
args.negative_samples,
)
gt_result = load_ground_truth(gt_path)
store = MultiTimeframeStore(
db_path=settings.db_path,
symbol=settings.symbol,
lookback_days=lookback_days + 120,
zigzag_reversal_pct=settings.gt_zigzag_reversal_pct,
)
extractor = MtfFeatureExtractor(
store=store,
base_interval_min=settings.gt_interval_min,
)
report = build_mtf_correlation_report(
gt_result=gt_result,
extractor=extractor,
lookback_days=lookback_days,
negative_sample_count=args.negative_samples,
exclude_bars=args.exclude_bars,
seed=args.seed,
)
json_path = save_mtf_report(report, settings.mtf_report_json)
html_path = render_mtf_html(report, settings.mtf_report_html)
gt = report.get("gt", {})
top = (report.get("global_feature_ranking") or [])[:5]
print("\n=== GT v3 MTF 상관 분석 ===")
print(f"구간: {report['analysis']['period_from']} ~ {report['analysis']['period_to']}")
print(
f"GT 신호 {gt.get('signals_in_period', 0)}건 · "
f"스냅샷 {gt.get('snapshots_extracted', 0)}건 · "
f"음성 {report['analysis']['negative_sample_count']}"
)
print("\n상위 피처 (|Cohen's d|):")
for row in top:
print(
f" {row['signal_label']} | {row['timeframe']} | {row['feature']} | "
f"d={row['cohens_d']}"
)
print(f"\nJSON: {json_path}")
print(f"HTML: {html_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,189 @@
#!/usr/bin/env python3
"""2단계: v3 신호 유형별 인과 기법 정합 (B/B*/B^/Bd/S/Sd)."""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from deepcoin.config import load_settings
from deepcoin.data.intervals import interval_label
from deepcoin.evaluation.report import (
build_comparison_report,
render_comparison_html,
save_comparison_report,
)
from deepcoin.evaluation.signal_type_report import (
build_signal_type_report,
render_signal_type_html,
save_signal_type_report,
)
from deepcoin.techniques.base import TechniqueParams
from deepcoin.techniques.registry import list_technique_ids
from deepcoin.techniques.runner import (
load_ground_truth,
load_technique_results,
run_all_techniques,
save_technique_result,
)
def _configure_logging(verbose: bool) -> None:
"""로깅 레벨을 설정한다."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def _resolve_gt_path(settings, gt_file: str | None) -> Path:
"""GT JSON 경로를 결정한다."""
if gt_file:
path = Path(gt_file)
if not path.is_absolute():
path = ROOT / path
return path
return settings.ground_truth_file
def main() -> int:
"""CLI 진입점."""
parser = argparse.ArgumentParser(description="2단계: v3 신호 유형별 인과 정합")
parser.add_argument(
"--gt-file",
type=str,
default=None,
help="Ground Truth JSON 경로 (기본: GROUND_TRUTH_FILE 또는 v3)",
)
parser.add_argument(
"--techniques",
type=str,
default=None,
help="실행할 기법 ID (쉼표 구분). 기본: 전체 (composite_v3 포함)",
)
parser.add_argument("--tolerance", type=int, default=None, help="GT 정합 허용 봉 수")
parser.add_argument("--no-comparison", action="store_true", help="종합 비교 리포트 생략")
parser.add_argument(
"--from-cache",
action="store_true",
help="data/spot/techniques/ 저장 JSON에서 로드 (재실행 생략)",
)
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
_configure_logging(args.verbose)
settings = load_settings()
gt_path = _resolve_gt_path(settings, args.gt_file)
if not gt_path.exists():
logging.error("Ground Truth 파일 없음: %s — 먼저 0_ground_truth.py 실행", gt_path)
return 1
gt_result = load_ground_truth(gt_path)
chart_tier = gt_result.get("meta", {}).get("chart_tier", "unknown")
if chart_tier not in ("v2", "v3"):
logging.warning(
"GT chart_tier=%s — 신호 유형 분석은 v2/v3에서 의미 있습니다.", chart_tier
)
technique_ids = None
if args.techniques:
technique_ids = [t.strip() for t in args.techniques.split(",") if t.strip()]
params = TechniqueParams(
interval_min=settings.gt_interval_min,
lookback_days=settings.gt_lookback_days,
min_leg_pct=settings.gt_min_leg_pct,
initial_cash_krw=settings.gt_initial_cash_krw,
fee_rate=settings.gt_trading_fee_rate,
extra={"reversal_pct": settings.gt_zigzag_reversal_pct},
)
tolerance = args.tolerance or settings.gt_align_tolerance_bars
logging.info(
"신호 유형 정합: %s %s, %s일, GT=%s, tolerance=%s",
settings.symbol,
interval_label(params.interval_min),
params.lookback_days,
gt_path.name,
tolerance,
)
results = (
load_technique_results(settings.techniques_dir, technique_ids)
if args.from_cache
else run_all_techniques(
db_path=settings.db_path,
symbol=settings.symbol,
params=params,
gt_result=gt_result,
tolerance_bars=tolerance,
technique_ids=technique_ids,
on_result=lambda result: save_technique_result(result, settings.techniques_dir),
)
)
if not results:
logging.error("기법 결과 없음 — 먼저 2_run_techniques.py 실행")
return 1
for result in results:
align = result.alignment or {}
by_type = align.get("by_signal_type") or {}
type_hits = []
for signal_type, type_align in by_type.items():
if type_align.get("gt_count", 0) > 0:
type_hits.append(
f"{signal_type}:{type_align['recall']*100:.0f}%"
)
type_summary = ", ".join(type_hits[:4])
if len(type_hits) > 4:
type_summary += "..."
print(
f" [{result.technique_id}] score={align.get('score', 0)*100:.1f} "
f"| {type_summary or 'no types'}"
)
signal_report = build_signal_type_report(results, gt_result, settings.symbol)
st_json = save_signal_type_report(signal_report, settings.signal_type_report_json)
st_html = render_signal_type_html(signal_report, settings.signal_type_report_html)
print(f"\n=== v3 신호 유형별 최고 Recall ===")
for row in signal_report.get("best_by_signal_type", []):
print(
f" {row['signal_label']}: "
f"{row['best_technique_name']} {row['best_recall']*100:.0f}% "
f"({row['best_technique_id']}, GT {row['gt_count']}건)"
)
print(f"신호 유형 리포트 JSON: {st_json}")
print(f"신호 유형 리포트 HTML: {st_html}")
if not args.no_comparison:
comparison = build_comparison_report(results, gt_result, settings.symbol)
cmp_json = save_comparison_report(comparison, settings.analysis_report_json)
cmp_html = render_comparison_html(comparison, settings.analysis_report_html)
print(f"\n=== GT 정합 순위 (상위 3) ===")
for idx, row in enumerate(comparison["ranking"][:3], start=1):
print(
f" {idx}. {row['technique_name']}: "
f"score {row['score']*100:.1f}, leg recall {row['leg_recall']*100:.0f}%"
)
print(f"비교 리포트 JSON: {cmp_json}")
print(f"비교 리포트 HTML: {cmp_html}")
print(f"\n등록 기법: {', '.join(list_technique_ids())}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

23
scripts/2_run_stage2_all.sh Executable file
View File

@@ -0,0 +1,23 @@
#!/usr/bin/env bash
# 현물 2단계 전체: 기법 실행 → 인과 sim → 신호유형 정합 → MTF 분석
set -euo pipefail
cd "$(dirname "$0")/.."
export PYTHONPATH=src
PY="${PY:-/opt/anaconda3/envs/ncue/bin/python}"
LOG="${LOG:-/tmp/deepcoin_stage2.log}"
echo "=== 현물 2단계 파이프라인 시작 $(date '+%Y-%m-%d %H:%M:%S') ===" | tee "$LOG"
echo "[1/4] 기법 실행 (39종)..." | tee -a "$LOG"
"$PY" scripts/2_run_techniques.py 2>&1 | tee -a "$LOG"
echo "[2/4] 인과 sim + 차트..." | tee -a "$LOG"
"$PY" scripts/2_run_causal_sim.py 2>&1 | tee -a "$LOG"
echo "[3/4] 신호 유형별 정합..." | tee -a "$LOG"
"$PY" scripts/2_run_signal_type_align.py --from-cache 2>&1 | tee -a "$LOG"
echo "[4/4] MTF 상관 분석..." | tee -a "$LOG"
"$PY" scripts/2_run_mtf_analysis.py 2>&1 | tee -a "$LOG"
echo "=== 현물 2단계 완료 $(date '+%Y-%m-%d %H:%M:%S') ===" | tee -a "$LOG"

158
scripts/2_run_techniques.py Normal file
View File

@@ -0,0 +1,158 @@
#!/usr/bin/env python3
"""2단계: 인과 기법 GT 정합 분석 (과거 데이터만 · 0단계 타점 대비)."""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from deepcoin.config import load_settings
from deepcoin.data.intervals import interval_label
from deepcoin.evaluation.report import (
build_comparison_report,
render_comparison_html,
save_comparison_report,
)
from deepcoin.techniques.base import TechniqueParams
from deepcoin.techniques.registry import list_technique_ids
from deepcoin.techniques.runner import (
load_ground_truth,
run_all_techniques,
save_technique_result,
)
def _configure_logging(verbose: bool) -> None:
"""로깅 레벨을 설정한다."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def main() -> int:
"""CLI 진입점."""
parser = argparse.ArgumentParser(description="2단계: 인과 기법 GT 정합 분석")
parser.add_argument(
"--techniques",
type=str,
default=None,
help="실행할 기법 ID (쉼표 구분). 기본: 전체",
)
parser.add_argument("--tolerance", type=int, default=None, help="GT 정합 허용 봉 수")
parser.add_argument("--no-report", action="store_true", help="비교 리포트 생략")
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
_configure_logging(args.verbose)
settings = load_settings()
if not settings.ground_truth_file.exists():
logging.error("Ground Truth 파일 없음: %s — 먼저 0_ground_truth.py 실행", settings.ground_truth_file)
return 1
gt_result = load_ground_truth(settings.ground_truth_file)
technique_ids = None
if args.techniques:
technique_ids = [t.strip() for t in args.techniques.split(",") if t.strip()]
params = TechniqueParams(
interval_min=settings.gt_interval_min,
lookback_days=settings.gt_lookback_days,
min_leg_pct=settings.gt_min_leg_pct,
initial_cash_krw=settings.gt_initial_cash_krw,
fee_rate=settings.gt_trading_fee_rate,
extra={"reversal_pct": settings.gt_zigzag_reversal_pct},
)
tolerance = args.tolerance or settings.gt_align_tolerance_bars
logging.info(
"기법 실행: %s %s, %s일, tolerance=%s",
settings.symbol,
interval_label(params.interval_min),
params.lookback_days,
tolerance,
)
technique_count = len(technique_ids) if technique_ids else len(list_technique_ids())
print(
f"\n=== 2단계 기법 실행 시작 ({technique_count}종) ===",
flush=True,
)
completed = 0
def _on_result(result) -> None:
nonlocal completed
save_technique_result(result, settings.techniques_dir)
completed += 1
pct = completed / technique_count * 100.0 if technique_count else 100.0
align = result.alignment or {}
print(
f"[기법] {completed}/{technique_count} ({pct:.1f}%) — "
f"{result.technique_id} score={align.get('score', 0)*100:.1f}",
flush=True,
)
results = run_all_techniques(
db_path=settings.db_path,
symbol=settings.symbol,
params=params,
gt_result=gt_result,
tolerance_bars=tolerance,
technique_ids=technique_ids,
on_result=_on_result,
)
saved_paths: list[Path] = []
for result in results:
path = settings.techniques_dir / f"{result.technique_id}.json"
saved_paths.append(path)
align = result.alignment or {}
legs = align.get("legs", {})
print(
f" [{result.technique_id}] {result.technique_name}: "
f"레그 {result.summary.get('leg_count', 0)}개, "
f"수익 {result.pnl.get('total_return_pct', 0):+.1f}%, "
f"GT정합 score={align.get('score', 0)*100:.1f} "
f"(leg recall {legs.get('leg_recall', 0)*100:.0f}%)"
)
print(f"\n=== 2단계 인과 정합 분석 완료 ({len(results)}개) ===")
print(f"저장: {settings.techniques_dir}/")
for path in saved_paths:
print(f" - {path.name}")
if not args.no_report:
report = build_comparison_report(results, gt_result, settings.symbol)
json_path = save_comparison_report(report, settings.analysis_report_json)
html_path = render_comparison_html(report, settings.analysis_report_html)
print(f"\n=== GT 정합 순위 (상위 3) ===")
gt_return = report["gt"]["return_pct"]
print(f"GT 벤치마크: {gt_return:+.1f}%")
for idx, row in enumerate(report["ranking"][:3], start=1):
print(
f" {idx}. {row['technique_name']}: "
f"score {row['score']*100:.1f}, "
f"수익 {row['tech_return_pct']:+.1f}%, "
f"leg recall {row['leg_recall']*100:.0f}%"
)
print(f"리포트 JSON: {json_path}")
print(f"리포트 HTML: {html_path}")
print(f"\n등록 기법: {', '.join(list_technique_ids())}")
return 0
if __name__ == "__main__":
raise SystemExit(main())