hybrid DD tier와 Option C 2차(+1000%) 검증을 추가하고 실거래 사이징을 정합한다.

인과 GT leg 엔진·drawdown tier·train 캘리브레이션, Phase 2 Go/No-Go 및 시뮬 리포트를 반영한다.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
xavis
2026-06-01 16:09:18 +09:00
parent 9b00ef34c6
commit d385456867
21 changed files with 3315 additions and 1178 deletions

View File

@@ -0,0 +1,173 @@
"""
인과 GT leg 엔진 파라미터 그리드 탐색·최적 저장.
"""
from __future__ import annotations
import json
from itertools import product
from pathlib import Path
from typing import Any
from config import (
CHART_LOOKBACK_DAYS,
GT_BUY_BB_MAX,
GT_BUY_MIN_SWING_PCT,
GT_MIN_SWING_PCT,
GT_PIVOT_ORDER,
MATCH_PRIMARY_INTERVAL,
SYMBOL,
)
from deepcoin.data.mtf_bb import load_frames_from_db
from deepcoin.ground_truth.causal_gt_trades import simulate_causal_gt_portfolio
from deepcoin.ground_truth.ground_truth import load_ground_truth
from deepcoin.ops.monitor import Monitor
from deepcoin.paths import MATCHING_CAUSAL_GT_CALIBRATION_JSON, resolve_ground_truth_file
def default_causal_gt_params() -> dict[str, Any]:
"""
인과 GT leg 엔진 기본 파라미터.
Returns:
build_causal_split_buy_peak_sell_trades 키워드 인자.
"""
from config import (
CAUSAL_GT_MIN_BARS_BETWEEN_LEGS,
CAUSAL_GT_MIN_LEG_PCT,
CAUSAL_GT_PEAK_MODE,
CAUSAL_GT_USE_LOCAL_TROUGH,
)
return {
"pivot_order": GT_PIVOT_ORDER,
"buy_swing_pct": GT_BUY_MIN_SWING_PCT,
"sell_swing_pct": GT_MIN_SWING_PCT,
"bb_max": GT_BUY_BB_MAX,
"min_leg_pct": CAUSAL_GT_MIN_LEG_PCT,
"use_local_trough": CAUSAL_GT_USE_LOCAL_TROUGH,
"peak_mode": CAUSAL_GT_PEAK_MODE,
"min_bars_between_legs": CAUSAL_GT_MIN_BARS_BETWEEN_LEGS,
}
def load_causal_gt_params(path: Path | None = None) -> dict[str, Any]:
"""
캘리브레이션 JSON 또는 config 기본값.
Args:
path: JSON 경로. None이면 MATCHING_CAUSAL_GT_CALIBRATION_JSON.
Returns:
best params dict.
"""
p = path or MATCHING_CAUSAL_GT_CALIBRATION_JSON
if p.is_file():
data = json.loads(p.read_text(encoding="utf-8"))
best = data.get("best_params") or data.get("params")
if best:
return dict(best)
return default_causal_gt_params()
def _grid_space() -> dict[str, list[Any]]:
"""탐색 그리드 (로컬 peak 최적화 반영, 조합 ~864)."""
return {
"peak_mode": ["local", "zigzag"],
"pivot_order": [8, 10, 12, 15],
"buy_swing_pct": [2.0, 2.5, 3.0],
"sell_swing_pct": [3.0, 4.0],
"bb_max": [0.55, 0.65, 0.75],
"min_leg_pct": [3.0, 5.0, 8.0],
"min_bars_between_legs": [60, 90],
"use_local_trough": [True, False],
}
def run_causal_gt_calibration(
*,
min_trades: int = 30,
top_n: int = 20,
out_path: Path | None = None,
) -> dict[str, Any]:
"""
그리드 탐색 후 최적 파라미터 JSON 저장.
Args:
min_trades: 최소 체결 수 미만 조합 제외.
top_n: 상위 N개 기록.
out_path: 저장 경로.
Returns:
calibration report dict.
"""
gt = load_ground_truth(resolve_ground_truth_file()) or {}
mark = float((gt.get("summary") or {}).get("mark_price") or 0)
gt_pnl = float(
(gt.get("summary") or {}).get("pnl_pct")
or 0
)
mon = Monitor(cooldown_file=None)
frames = load_frames_from_db(mon, SYMBOL, lookback_days=CHART_LOOKBACK_DAYS)
df = frames[MATCH_PRIMARY_INTERVAL].copy()
grid = _grid_space()
keys = list(grid.keys())
results: list[dict[str, Any]] = []
total = 1
for k in keys:
total *= len(grid[k])
print(f"[causal_gt] 그리드 {total} 조합 탐색...")
done = 0
for combo in product(*(grid[k] for k in keys)):
params = dict(zip(keys, combo))
r = simulate_causal_gt_portfolio(df, last_price=mark or None, **params)
tc = int(r.get("trade_count") or 0)
done += 1
if done % 200 == 0:
print(f" ... {done}/{total}")
if tc < min_trades:
continue
pnl = float(r.get("pnl_pct") or 0)
results.append(
{
"pnl_pct": round(pnl, 2),
"trade_count": tc,
"leg_count": r.get("leg_count", 0),
"max_drawdown_pct": r.get("max_drawdown_pct"),
"capture_ratio": round(pnl / gt_pnl, 4) if gt_pnl else 0,
"params": params,
}
)
results.sort(key=lambda x: x["pnl_pct"], reverse=True)
best = results[0] if results else None
report: dict[str, Any] = {
"symbol": SYMBOL,
"interval_min": MATCH_PRIMARY_INTERVAL,
"gt_pnl_pct": gt_pnl,
"grid_combinations": total,
"valid_combinations": len(results),
"min_trades": min_trades,
"best": best,
"best_params": best["params"] if best else default_causal_gt_params(),
"top": results[:top_n],
"target_pnl_pct": 300.0,
"target_met": bool(best and best["pnl_pct"] >= 300.0),
}
out = out_path or MATCHING_CAUSAL_GT_CALIBRATION_JSON
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"[causal_gt] 저장: {out}")
if best:
print(
f"[causal_gt] 최적 PnL={best['pnl_pct']}% "
f"trades={best['trade_count']} legs={best['leg_count']} "
f"capture={best.get('capture_ratio', 0):.2%}"
)
else:
print("[causal_gt] 유효 조합 없음")
return report

View File

@@ -0,0 +1,447 @@
"""
Phase 3: monitor 발화 + drawdown/past-leg tier (인과적).
매도는 monitor(sell_mtf_cross) 유지, tier만 drawdown·과거 leg 수익으로 강화합니다.
"""
from __future__ import annotations
from typing import Any
import pandas as pd
from config import (
CAUSAL_GT_DD_LARGE_PCT,
CAUSAL_GT_DD_MEDIUM_PCT,
GT_BUY_PCT_LARGE_LEG,
GT_BUY_PCT_MEDIUM_LEG,
GT_BUY_PCT_SMALL_LEG,
GT_INITIAL_CASH_KRW,
SIM_TIER_CONVICTION_DD_PCT,
TRADING_FEE_RATE,
)
from deepcoin.ground_truth.gt_allocation import (
allocate_order_amounts_chronological,
simulate_portfolio_summary,
)
from deepcoin.matching.portfolio_sim import sort_fires_chronological
from deepcoin.matching.position_sizing import enrich_sim_trades_with_gt_weights
def _deduped_ohlc(df: pd.DataFrame) -> pd.DataFrame:
"""
DatetimeIndex 중복 제거·정렬 (drawdown lookup용).
Args:
df: OHLC DataFrame.
Returns:
index unique OHLC.
"""
if df.empty:
return df
out = df.sort_index()
if not out.index.is_unique:
out = out[~out.index.duplicated(keep="last")]
return out
def _close_series_from_df(df: pd.DataFrame) -> pd.Series:
"""
OHLC DataFrame에서 종가 시리즈 추출 (positional index).
Args:
df: Open/Close 또는 open/close 컬럼을 가진 OHLC.
Returns:
float 종가 시리즈.
"""
if df.empty:
return pd.Series(dtype=float)
frame = _deduped_ohlc(df)
for col in ("close", "Close"):
if col in frame.columns:
return frame[col].astype(float).reset_index(drop=True)
raise KeyError("OHLC DataFrame에 close/Close 컬럼이 없습니다.")
def _bar_index_at(df: pd.DataFrame, dt: str) -> int:
"""
시각 dt에 대응하는 bar 위치 (인덱스 중복 시 nearest).
Args:
df: DatetimeIndex OHLC.
dt: ISO 시각 문자열.
Returns:
정수 bar 위치 (0..n-1).
"""
frame = _deduped_ohlc(df)
if frame.empty:
return 0
try:
ts = pd.to_datetime(dt)
except (TypeError, ValueError):
return 0
pos = int(frame.index.get_indexer([ts], method="nearest")[0])
return max(pos, 0)
def _drawdown_pct_at_index(closes: pd.Series, idx: int) -> float:
"""
bar idx 시점 drawdown % (과거 rolling high 대비, 인과적).
Args:
closes: 종가 시리즈.
idx: 봉 위치.
Returns:
drawdown % (0~100).
"""
if idx < 0 or idx >= len(closes):
return 0.0
seg = closes.iloc[: idx + 1].astype(float)
if seg.empty:
return 0.0
peak = float(seg.max())
cur = float(seg.iloc[-1])
if peak <= 0:
return 0.0
return max((peak - cur) / peak * 100.0, 0.0)
def hybrid_tier_scale(
trade: dict[str, Any],
*,
completed_leg_ret: dict[int, float],
enhanced: bool = False,
dd_large_pct: float | None = None,
dd_medium_pct: float | None = None,
) -> float:
"""
과거 leg 수익 tier + drawdown tier (인과적).
Args:
trade: 매수 trade dict (drawdown_pct 포함).
completed_leg_ret: 청산 완료 leg realized return %.
enhanced: True면 medium tier·conviction 플래그 적용.
dd_large_pct: drawdown large tier 임계(%). None이면 config.
dd_medium_pct: drawdown medium tier 임계(%). None이면 config.
Returns:
asset_pct_scale.
"""
from config import GT_LARGE_LEG_TOP_PCT
from deepcoin.matching.position_sizing import (
large_leg_ids_from_past_returns,
)
dd_large = float(dd_large_pct if dd_large_pct is not None else CAUSAL_GT_DD_LARGE_PCT)
dd_medium = float(dd_medium_pct if dd_medium_pct is not None else CAUSAL_GT_DD_MEDIUM_PCT)
lid = int(trade.get("leg_id", 0))
large_past = large_leg_ids_from_past_returns(completed_leg_ret, GT_LARGE_LEG_TOP_PCT)
dd = float(trade.get("drawdown_pct") or 0.0)
if lid in large_past:
if enhanced and dd >= SIM_TIER_CONVICTION_DD_PCT:
trade["conviction_buy"] = True
return float(GT_BUY_PCT_LARGE_LEG)
if dd >= dd_large:
if enhanced:
trade["conviction_buy"] = True
return float(GT_BUY_PCT_LARGE_LEG)
if dd >= dd_medium:
if enhanced and dd >= SIM_TIER_CONVICTION_DD_PCT:
trade["conviction_buy"] = True
return float(GT_BUY_PCT_MEDIUM_LEG) if enhanced else float(GT_BUY_PCT_LARGE_LEG) * 0.5
return float(GT_BUY_PCT_SMALL_LEG)
def _monitor_rows_from_fires(fires: pd.DataFrame) -> list[dict[str, Any]]:
"""monitor 발화 DataFrame → trade dict 리스트."""
rows: list[dict[str, Any]] = []
for _, r in sort_fires_chronological(fires).iterrows():
rows.append(
{
"dt": str(r["dt"]),
"action": r["side"],
"price": float(r["close"]),
"rule_id": r.get("rule_id", ""),
}
)
return rows
def build_monitor_hybrid_sized_trades(
fires: pd.DataFrame,
df: pd.DataFrame,
*,
enhanced: bool = False,
initial_cash: float = GT_INITIAL_CASH_KRW,
fee_rate: float = TRADING_FEE_RATE,
dd_large_pct: float | None = None,
dd_medium_pct: float | None = None,
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
"""
monitor 발화 → hybrid tier amount_krw 배분 (인과적).
Args:
fires: monitor rule 발화 (buy+sell).
df: 3m OHLC (drawdown 계산).
enhanced: conviction·medium tier 사용.
initial_cash: 시작 현금.
fee_rate: 수수료율.
Returns:
(amount_krw가 채워진 trade dict, alloc_stats).
"""
from deepcoin.ground_truth.ground_truth import load_ground_truth, order_trades_chronological
from deepcoin.paths import resolve_ground_truth_file
if fires.empty:
return [], {"buy_executed": 0, "buy_skipped": 0}
gt_data = load_ground_truth(resolve_ground_truth_file()) or {}
gt_trades = order_trades_chronological(gt_data.get("trades") or [])
enriched = enrich_sim_trades_with_gt_weights(
_monitor_rows_from_fires(fires),
gt_trades,
causal_legs=True,
)
enriched = _attach_drawdown_to_buys(enriched, df)
def scale_fn(t: dict[str, Any], completed_leg_ret: dict[int, float]) -> float:
return hybrid_tier_scale(
t,
completed_leg_ret=completed_leg_ret,
enhanced=enhanced,
dd_large_pct=dd_large_pct,
dd_medium_pct=dd_medium_pct,
)
return allocate_order_amounts_chronological(
enriched,
initial_cash=initial_cash,
fee_rate=fee_rate,
causal_tier=False,
asset_pct_scale_fn=scale_fn,
)
def _simulate_monitor_tier_portfolio(
fires: pd.DataFrame,
df: pd.DataFrame,
*,
enhanced: bool = False,
last_price: float | None = None,
initial_cash: float = GT_INITIAL_CASH_KRW,
fee_rate: float = TRADING_FEE_RATE,
dd_large_pct: float | None = None,
dd_medium_pct: float | None = None,
) -> dict[str, Any]:
"""
monitor buy+sell + tier 복리 시뮬 (hybrid 또는 enhanced).
Args:
fires: monitor rule 발화 (buy+sell).
df: 3m OHLC (drawdown 계산).
enhanced: conviction·medium tier 사용.
last_price: 미청산 평가가.
initial_cash: 시작 현금.
fee_rate: 수수료율.
dd_large_pct: drawdown large tier 임계(%).
dd_medium_pct: drawdown medium tier 임계(%).
Returns:
portfolio summary dict.
"""
mode = "monitor_tier_enhanced" if enhanced else "monitor_dd_tier"
if fires.empty:
return {"pnl_pct": 0.0, "trade_count": 0, "sizing_mode": mode}
sized, alloc_stats = build_monitor_hybrid_sized_trades(
fires,
df,
enhanced=enhanced,
initial_cash=initial_cash,
fee_rate=fee_rate,
dd_large_pct=dd_large_pct,
dd_medium_pct=dd_medium_pct,
)
mark = last_price
if mark is None and not df.empty:
try:
mark = float(_close_series_from_df(df).iloc[-1])
except KeyError:
mark = None
result = simulate_portfolio_summary(
sized,
initial_cash=initial_cash,
fee_rate=fee_rate,
last_price=mark,
use_amount_krw=True,
)
result["sizing_mode"] = mode
if enhanced:
result["sizing_note"] = (
"monitor buy+sell + past-leg·drawdown tier + conviction (미래 미사용)"
)
else:
result["sizing_note"] = (
"monitor buy+sell + drawdown·past-leg tier (미래 미사용)"
)
result["alloc_stats"] = alloc_stats
result["input_fires"] = int(len(fires))
return result
def _attach_drawdown_to_buys(
trades: list[dict[str, Any]],
df: pd.DataFrame,
) -> list[dict[str, Any]]:
"""
매수 trade에 bar drawdown % 부여 (인과적).
Args:
trades: enrich된 trade dict.
df: 3m OHLC (DatetimeIndex).
Returns:
drawdown_pct가 추가된 trade dict.
"""
if df.empty:
return trades
close_s = _close_series_from_df(df)
out: list[dict[str, Any]] = []
for t in trades:
row = dict(t)
if row.get("action") != "buy":
out.append(row)
continue
bar_idx = _bar_index_at(df, str(row.get("dt", "")))
row["drawdown_pct"] = round(_drawdown_pct_at_index(close_s, bar_idx), 2)
out.append(row)
return out
def simulate_monitor_dd_tier_portfolio(
fires: pd.DataFrame,
df: pd.DataFrame,
*,
last_price: float | None = None,
initial_cash: float = GT_INITIAL_CASH_KRW,
fee_rate: float = TRADING_FEE_RATE,
dd_large_pct: float | None = None,
dd_medium_pct: float | None = None,
) -> dict[str, Any]:
"""
monitor buy+sell + drawdown/past-leg tier 복리 시뮬.
Args:
fires: monitor rule 발화 (buy+sell).
df: 3m OHLC (drawdown 계산).
last_price: 미청산 평가가.
initial_cash: 시작 현금.
fee_rate: 수수료율.
dd_large_pct: drawdown large tier 임계(%).
dd_medium_pct: drawdown medium tier 임계(%).
Returns:
portfolio summary dict.
"""
return _simulate_monitor_tier_portfolio(
fires,
df,
enhanced=False,
last_price=last_price,
initial_cash=initial_cash,
fee_rate=fee_rate,
dd_large_pct=dd_large_pct,
dd_medium_pct=dd_medium_pct,
)
def simulate_monitor_tier_enhanced_portfolio(
fires: pd.DataFrame,
df: pd.DataFrame,
*,
last_price: float | None = None,
initial_cash: float = GT_INITIAL_CASH_KRW,
fee_rate: float = TRADING_FEE_RATE,
) -> dict[str, Any]:
"""
Phase 4: monitor + past-leg·drawdown tier + conviction (weight 분할 생략).
Args:
fires: monitor rule 발화 (buy+sell).
df: 3m OHLC (drawdown 계산).
last_price: 미청산 평가가.
initial_cash: 시작 현금.
fee_rate: 수수료율.
Returns:
portfolio summary dict.
"""
return _simulate_monitor_tier_portfolio(
fires,
df,
enhanced=True,
last_price=last_price,
initial_cash=initial_cash,
fee_rate=fee_rate,
)
def simulate_causal_gt_hybrid_portfolio(
buy_fires: pd.DataFrame,
df: pd.DataFrame,
*,
monitor_fires: pd.DataFrame | None = None,
last_price: float | None = None,
cg_params: dict[str, Any] | None = None,
initial_cash: float = GT_INITIAL_CASH_KRW,
fee_rate: float = TRADING_FEE_RATE,
dd_large_pct: float | None = None,
dd_medium_pct: float | None = None,
) -> dict[str, Any]:
"""
Phase 3 하이브리드: monitor buy+sell + DD tier (권장).
monitor_fires가 있으면 DD tier 경로, 없으면 구 peak-sell 경로(legacy).
Args:
buy_fires: buy 발화 (legacy peak-sell 경로용).
df: 3m OHLCV.
monitor_fires: monitor buy+sell (권장).
last_price: 미청산 평가가.
cg_params: legacy 파라미터.
initial_cash: 시작 현금.
fee_rate: 수수료율.
dd_large_pct: drawdown large tier 임계(%).
dd_medium_pct: drawdown medium tier 임계(%).
Returns:
portfolio summary dict.
"""
if monitor_fires is not None and not monitor_fires.empty:
return simulate_monitor_dd_tier_portfolio(
monitor_fires,
df,
last_price=last_price,
initial_cash=initial_cash,
fee_rate=fee_rate,
dd_large_pct=dd_large_pct,
dd_medium_pct=dd_medium_pct,
)
return {
"pnl_pct": 0.0,
"trade_count": 0,
"note": "monitor_fires required",
"sizing_mode": "causal_gt_hybrid",
}

View File

@@ -0,0 +1,433 @@
"""
인과적 GT leg 타점 생성 — t 시점까지 데이터만 사용.
GT split_buy_peak_sell 과 동일 구조(분할매수·65/35 매도·leg_id)이나
피벗·leg 종료는 gt_signal_causal 확정 신호만 사용합니다.
"""
from __future__ import annotations
from typing import Any, Literal
import pandas as pd
from config import (
GT_BUY_MIN_BARS,
GT_BUY_MIN_SWING_PCT,
GT_MAX_BUYS_PER_LEG,
GT_MAX_SELLS_PER_LEG,
GT_MIN_SWING_PCT,
GT_PIVOT_ORDER,
GT_SELL_SPLIT_GAP_PCT,
)
from deepcoin.ground_truth.gt_model import leg_entry_weights, leg_exit_weights
from deepcoin.ground_truth.gt_signal_causal import enrich_scan_frame_gt_signals_causal
PeakMode = Literal["zigzag", "local"]
def _collect_causal_buy_bars(
frame: pd.DataFrame,
start: pd.Timestamp,
end: pd.Timestamp,
*,
min_bars: int,
max_buys: int,
use_local_trough: bool,
bb_max: float,
) -> list[tuple[pd.Timestamp, float]]:
"""
leg 구간 (start, end) 내 인과적 매수 후보 봉.
Args:
frame: gt_buy_signal 등 포함.
start: 이전 매도 시각(미포함).
end: leg 종료 peak 시각(포함).
min_bars: 분할 매수 최소 간격.
max_buys: leg당 최대 매수.
use_local_trough: True면 gt_trough_local+BB, False면 gt_buy_signal.
bb_max: BB %B 상한.
Returns:
(dt, low_price) 리스트 (시간순).
"""
seg = frame[(frame.index > start) & (frame.index <= end)]
if seg.empty:
return []
if use_local_trough:
bb = pd.to_numeric(seg.get("bb_pos"), errors="coerce")
mask = (seg["gt_trough_local"] == 1) & (bb <= bb_max)
else:
mask = seg["gt_buy_signal"] == 1
cands: list[tuple[pd.Timestamp, float, int]] = []
for ts, row in seg[mask].iterrows():
price = float(row["Low"]) if "Low" in row else float(row.get("close", 0))
if price <= 0:
continue
idx = frame.index.get_loc(ts)
if isinstance(idx, slice):
idx = int(idx.start or 0)
cands.append((ts, price, int(idx)))
cands.sort(key=lambda x: x[0])
filtered: list[tuple[pd.Timestamp, float, int]] = []
for ts, price, idx in cands:
if filtered and idx - filtered[-1][2] < min_bars:
if price < filtered[-1][1]:
filtered[-1] = (ts, price, idx)
continue
filtered.append((ts, price, idx))
if len(filtered) > max_buys:
filtered.sort(key=lambda x: x[1])
filtered = sorted(filtered[:max_buys], key=lambda x: x[0])
return [(ts, price) for ts, price, _ in filtered]
def _causal_sell_points(
frame: pd.DataFrame,
peak_ts: pd.Timestamp,
max_splits: int,
*,
peak_signal_col: str = "gt_peak_zigzag",
) -> list[tuple[pd.Timestamp, float, float]]:
"""
인과적 매도: peak 확정봉 + (선택) 직후 확정 peak 1건 분할.
Args:
frame: OHLC + gt peak 컬럼.
peak_ts: leg 종료 peak 시각.
max_splits: 최대 분할(2).
peak_signal_col: 두 번째 분할 탐색 컬럼.
Returns:
(dt, high_price, weight) 리스트.
"""
if peak_ts not in frame.index:
return []
row = frame.loc[peak_ts]
if isinstance(row, pd.DataFrame):
row = row.iloc[-1]
main_price = float(row["High"]) if "High" in row else float(row.get("close", 0))
weights = leg_exit_weights(max_splits if max_splits >= 2 else 1)
if max_splits < 2 or len(weights) < 2:
return [(peak_ts, main_price, 1.0)]
peak_idx = frame.index.get_loc(peak_ts)
if isinstance(peak_idx, slice):
peak_idx = int(peak_idx.start or 0)
seg = frame.iloc[peak_idx + 1 : peak_idx + 81]
second_ts: pd.Timestamp | None = None
second_price = main_price
for ts, srow in seg.iterrows():
if int(srow.get(peak_signal_col, 0)) != 1:
continue
px = float(srow["High"]) if "High" in srow else float(srow.get("close", 0))
gap = abs(px - main_price) / max(main_price, 1e-9) * 100.0
if gap <= GT_SELL_SPLIT_GAP_PCT:
second_ts = ts
second_price = px
break
if second_ts is None:
return [(peak_ts, main_price, 1.0)]
return [
(peak_ts, main_price, weights[0]),
(second_ts, second_price, weights[1]),
]
def _peak_signal_column(peak_mode: PeakMode) -> str:
"""leg 종료 peak 컬럼명."""
return "gt_peak_local" if peak_mode == "local" else "gt_peak_zigzag"
def _filter_peak_times(
frame: pd.DataFrame,
peak_col: str,
min_bars: int,
) -> list[pd.Timestamp]:
"""
peak 후보를 min_bars 간격으로稀疏화 (인과적, 시간순).
Args:
frame: OHLC frame.
peak_col: peak 신호 컬럼.
min_bars: 최소 봉 간격.
Returns:
peak 타임스탬프 리스트.
"""
peaks = frame.index[frame[peak_col] == 1]
if len(peaks) == 0:
return []
kept: list[pd.Timestamp] = []
last_idx = -min_bars
for ts in peaks:
idx = frame.index.get_loc(ts)
if isinstance(idx, slice):
idx = int(idx.start or 0)
if idx - last_idx >= min_bars:
kept.append(ts)
last_idx = int(idx)
return kept
def _precompute_buy_candidates(
frame: pd.DataFrame,
*,
use_local_trough: bool,
bb_max: float,
) -> list[tuple[int, pd.Timestamp, float]]:
"""
전구간 매수 후보 (bar_idx, ts, price).
Args:
frame: enriched frame.
use_local_trough: local trough vs zigzag buy.
bb_max: BB 상한.
Returns:
(idx, ts, price) 리스트.
"""
if use_local_trough:
bb = pd.to_numeric(frame.get("bb_pos"), errors="coerce")
mask = (frame["gt_trough_local"] == 1) & (bb <= bb_max)
else:
mask = frame["gt_buy_signal"] == 1
out: list[tuple[int, pd.Timestamp, float]] = []
for ts in frame.index[mask]:
row = frame.loc[ts]
if isinstance(row, pd.DataFrame):
row = row.iloc[-1]
price = float(row["Low"]) if "Low" in row else float(row.get("close", 0))
if price <= 0:
continue
idx = frame.index.get_loc(ts)
if isinstance(idx, slice):
idx = int(idx.start or 0)
out.append((int(idx), ts, price))
return out
def _buys_in_range(
candidates: list[tuple[int, pd.Timestamp, float]],
start_idx: int,
end_idx: int,
*,
min_bars: int,
max_buys: int,
) -> list[tuple[pd.Timestamp, float]]:
"""start_idx < bar_idx <= end_idx 구간 매수 후보 (min_bars·max_buys 적용)."""
seg = [(i, ts, p) for i, ts, p in candidates if start_idx < i <= end_idx]
if not seg:
return []
filtered: list[tuple[int, pd.Timestamp, float]] = []
for i, ts, p in seg:
if filtered and i - filtered[-1][0] < min_bars:
if p < filtered[-1][2]:
filtered[-1] = (i, ts, p)
continue
filtered.append((i, ts, p))
if len(filtered) > max_buys:
filtered.sort(key=lambda x: x[2])
filtered = sorted(filtered[:max_buys], key=lambda x: x[0])
return [(ts, p) for _, ts, p in filtered]
def build_causal_split_buy_peak_sell_trades(
df: pd.DataFrame,
*,
pivot_order: int = GT_PIVOT_ORDER,
buy_swing_pct: float = GT_BUY_MIN_SWING_PCT,
sell_swing_pct: float = GT_MIN_SWING_PCT,
bb_max: float = 0.65,
min_leg_pct: float = GT_MIN_SWING_PCT,
buy_min_bars: int = GT_BUY_MIN_BARS,
max_buys: int = GT_MAX_BUYS_PER_LEG,
max_sells: int = GT_MAX_SELLS_PER_LEG,
use_local_trough: bool = True,
peak_mode: PeakMode = "local",
min_bars_between_legs: int = 60,
) -> list[dict[str, Any]]:
"""
인과적 split_buy_peak_sell trade dict 리스트.
Args:
df: 3m OHLCV+bb_pos (DatetimeIndex).
pivot_order: 피벗 확정 지연.
buy_swing_pct: 매수 ZigZag %.
sell_swing_pct: 매도 ZigZag %.
bb_max: BB %B 상한.
min_leg_pct: leg 최소 수익률(%).
buy_min_bars: 분할 매수 간격.
max_buys: leg당 매수 상한.
max_sells: leg당 매도 상한.
use_local_trough: local trough 분할매수 사용.
peak_mode: zigzag | local (leg 종료 peak).
min_bars_between_legs: 연속 leg 종료 최소 간격(봉).
Returns:
{dt, action, price, weight, leg_id} dict 리스트.
"""
frame = enrich_scan_frame_gt_signals_causal(
df,
pivot_order=pivot_order,
buy_swing_pct=buy_swing_pct,
sell_swing_pct=sell_swing_pct,
bb_max=bb_max,
)
peak_col = _peak_signal_column(peak_mode)
if peak_col not in frame.columns:
return []
peak_times = _filter_peak_times(frame, peak_col, min_bars_between_legs)
if not peak_times:
return []
buy_candidates = _precompute_buy_candidates(
frame,
use_local_trough=use_local_trough,
bb_max=bb_max,
)
start_idx = 0
if frame.index.size:
loc = frame.index.get_loc(frame.index[0])
start_idx = int(loc.start or 0) if isinstance(loc, slice) else int(loc)
peak_signal_col = peak_col
trades: list[dict[str, Any]] = []
prev_sell_idx = start_idx
leg_id = 0
leg_trough_price = 0.0
for peak_ts in peak_times:
peak_idx = frame.index.get_loc(peak_ts)
if isinstance(peak_idx, slice):
peak_idx = int(peak_idx.start or 0)
if peak_idx - prev_sell_idx < min_bars_between_legs:
continue
prow = frame.loc[peak_ts]
if isinstance(prow, pd.DataFrame):
prow = prow.iloc[-1]
peak_price = float(prow["High"]) if "High" in prow else float(prow.get("close", 0))
seg = frame.iloc[prev_sell_idx + 1 : peak_idx + 1]
if not seg.empty and "Low" in seg.columns:
leg_trough_price = float(seg["Low"].astype(float).min())
leg_pct = (
(peak_price - leg_trough_price) / max(leg_trough_price, 1e-9) * 100.0
if leg_trough_price > 0
else 0.0
)
if leg_pct < min_leg_pct:
continue
buys = _buys_in_range(
buy_candidates,
prev_sell_idx,
int(peak_idx),
min_bars=buy_min_bars,
max_buys=max_buys,
)
if not buys:
prev_sell_idx = int(peak_idx)
leg_trough_price = peak_price
continue
prices = [p for _, p in buys]
weights = leg_entry_weights(prices)
for (dt, price), w in zip(buys, weights):
trades.append(
{
"dt": dt.strftime("%Y-%m-%d %H:%M:%S"),
"action": "buy",
"price": round(price, 2),
"weight": round(w, 4),
"leg_id": leg_id,
}
)
sell_pts = _causal_sell_points(
frame,
peak_ts,
max_sells,
peak_signal_col=peak_signal_col,
)
for dt, price, w in sell_pts[:max_sells]:
trades.append(
{
"dt": dt.strftime("%Y-%m-%d %H:%M:%S"),
"action": "sell",
"price": round(price, 2),
"weight": round(w, 4),
"leg_id": leg_id,
}
)
prev_sell_idx = int(peak_idx)
leg_trough_price = peak_price
leg_id += 1
return trades
def simulate_causal_gt_portfolio(
df: pd.DataFrame,
*,
last_price: float | None = None,
**build_kw: Any,
) -> dict[str, Any]:
"""
인과 GT 타점 + causal tier 복리 포트폴리오.
Args:
df: 3m OHLCV.
last_price: 미청산 평가 종가.
build_kw: build_causal_split_buy_peak_sell_trades 인자.
Returns:
simulate_portfolio_summary 형식 dict + leg_count, params.
"""
from deepcoin.ground_truth.gt_allocation import (
allocate_order_amounts_chronological,
simulate_portfolio_summary,
)
raw = build_causal_split_buy_peak_sell_trades(df, **build_kw)
if not raw:
return {
"pnl_pct": 0.0,
"trade_count": 0,
"leg_count": 0,
"note": "no trades",
"sizing_mode": "causal_gt_leg_engine",
}
sized, alloc_stats = allocate_order_amounts_chronological(raw, causal_tier=True)
mark = last_price
if mark is None and "close" in df.columns:
mark = float(df["close"].iloc[-1])
result = simulate_portfolio_summary(
sized,
last_price=mark,
use_amount_krw=True,
)
leg_count = len({t.get("leg_id") for t in raw})
result["leg_count"] = leg_count
result["sizing_mode"] = "causal_gt_leg_engine"
result["sizing_note"] = (
"인과 GT leg: split_buy + peak_sell, causal tier 복리 (미래 미사용)"
)
result["causal_gt_params"] = dict(build_kw)
result["alloc_stats"] = alloc_stats
return result

View File

@@ -75,7 +75,7 @@ def allocate_order_amounts_chronological(
Returns:
(amount_krw 채워진 trades, alloc_stats).
"""
from config import GT_LARGE_LEG_TOP_PCT
from config import GT_BUY_PCT_LARGE_LEG, GT_LARGE_LEG_TOP_PCT
from deepcoin.matching.position_sizing import (
compute_buy_amount_krw,
@@ -109,6 +109,7 @@ def allocate_order_amounts_chronological(
sell_executed = 0
sell_skipped = 0
buy_amounts: list[float] = []
large_tier_buys = 0
completed_leg_ret: dict[int, float] = {}
leg_cost_krw: dict[int, float] = {}
leg_proceeds_krw: dict[int, float] = {}
@@ -128,7 +129,7 @@ def allocate_order_amounts_chronological(
)
scale = leg_asset_pct_scale(leg_id, large_now)
elif asset_pct_scale_fn is not None:
scale = asset_pct_scale_fn(t)
scale = asset_pct_scale_fn(t, completed_leg_ret)
else:
scale = leg_asset_pct_scale(leg_id, large_legs)
amount = compute_buy_amount_krw(
@@ -140,6 +141,7 @@ def allocate_order_amounts_chronological(
asset_pct_scale=scale,
min_order_krw=min_order_krw,
fee_rate=fee_rate,
ignore_weight_split=bool(t.get("conviction_buy")),
)
if amount <= 0:
t["amount_krw"] = 0
@@ -154,6 +156,8 @@ def allocate_order_amounts_chronological(
leg_cost_krw[leg_id] = leg_cost_krw.get(leg_id, 0.0) + amount + fee
buy_executed += 1
buy_amounts.append(amount)
if scale >= float(GT_BUY_PCT_LARGE_LEG) * 0.99:
large_tier_buys += 1
sell_leg = None
elif t["action"] == "sell":
@@ -188,7 +192,7 @@ def allocate_order_amounts_chronological(
if qty < 1e-12:
qty = 0.0
sell_executed += 1
if causal_tier and leg_qty <= 1e-12:
if (causal_tier or asset_pct_scale_fn is not None) and leg_qty <= 1e-12:
cost = leg_cost_krw.pop(leg_id, 0.0)
proceeds = leg_proceeds_krw.pop(leg_id, 0.0)
if cost > 0:
@@ -200,7 +204,8 @@ def allocate_order_amounts_chronological(
"sell_executed": sell_executed,
"sell_skipped": sell_skipped,
"buy_total_krw": round(sum(buy_amounts), 0),
"large_leg_count": len(large_legs),
"large_leg_count": large_tier_buys,
"large_tier_buy_count": large_tier_buys,
}
if buy_amounts:
stats["buy_amount_avg_krw"] = round(sum(buy_amounts) / len(buy_amounts), 0)

View File

@@ -74,6 +74,7 @@ def _zigzag_filter_causal(
prices: np.ndarray,
min_swing_pct: float,
kind: str,
pivot_order: int = GT_PIVOT_ORDER,
) -> np.ndarray:
"""
확정 피벗에 ZigZag 최소 스윙% 필터 (인과적, 순차 갱신).
@@ -90,7 +91,7 @@ def _zigzag_filter_causal(
"""
n = len(confirm)
out = np.zeros(n, dtype=np.int8)
order = GT_PIVOT_ORDER
order = int(pivot_order)
last_kind: str | None = None
last_price = 0.0
min_ratio = min_swing_pct / 100.0
@@ -158,10 +159,10 @@ def enrich_scan_frame_gt_signals_causal(
peak_conf = _confirmed_peak_mask(high, pivot_order)
trough_z = _zigzag_filter_causal(
trough_conf, low, buy_swing_pct, "trough"
trough_conf, low, buy_swing_pct, "trough", pivot_order=pivot_order
)
peak_z = _zigzag_filter_causal(
peak_conf, high, sell_swing_pct, "peak"
peak_conf, high, sell_swing_pct, "peak", pivot_order=pivot_order
)
out["gt_trough_local"] = trough_conf

View File

@@ -0,0 +1,200 @@
"""
Hybrid DD tier 임계값 train 그리드 → holdout 검증 (Option C 2차).
"""
from __future__ import annotations
import json
from itertools import product
from pathlib import Path
from typing import Any
import pandas as pd
from config import GT_INITIAL_CASH_KRW, MATCH_HOLDOUT_RATIO, TRADING_FEE_RATE
from deepcoin.ground_truth.causal_gt_hybrid import build_monitor_hybrid_sized_trades
from deepcoin.ground_truth.gt_allocation import simulate_portfolio_steps
from deepcoin.matching.option_c_phase2 import walk_forward_portfolio_by_month
from deepcoin.matching.portfolio_sim import sort_fires_chronological
from deepcoin.matching.simulation import portfolio_holdout_from_steps
from deepcoin.paths import MATCHING_HYBRID_DD_CALIBRATION_JSON
def default_dd_grid() -> dict[str, list[float]]:
"""DD large/medium 탐색 그리드."""
return {
"dd_large_pct": [5.0, 6.0, 8.0, 10.0, 12.0],
"dd_medium_pct": [2.0, 3.0, 4.0, 6.0],
}
def load_hybrid_dd_params(path: Path | None = None) -> dict[str, float]:
"""
캘리브레이션 JSON 또는 config 기본값.
Args:
path: JSON 경로.
Returns:
{dd_large_pct, dd_medium_pct}.
"""
from config import CAUSAL_GT_DD_LARGE_PCT, CAUSAL_GT_DD_MEDIUM_PCT
p = path or MATCHING_HYBRID_DD_CALIBRATION_JSON
if p.is_file():
data = json.loads(p.read_text(encoding="utf-8"))
best = data.get("best_params") or {}
if best.get("dd_large_pct") is not None:
return {
"dd_large_pct": float(best["dd_large_pct"]),
"dd_medium_pct": float(
best.get("dd_medium_pct", CAUSAL_GT_DD_MEDIUM_PCT)
),
}
return {
"dd_large_pct": float(CAUSAL_GT_DD_LARGE_PCT),
"dd_medium_pct": float(CAUSAL_GT_DD_MEDIUM_PCT),
}
def calibrate_hybrid_dd_thresholds(
fires: pd.DataFrame,
ohlc_df: pd.DataFrame,
*,
holdout_start: pd.Timestamp,
grid: dict[str, list[float]] | None = None,
last_price: float | None = None,
) -> dict[str, Any]:
"""
train 구간 PnL 최대 → holdout PnL로 검증, 최적 DD 임계 저장.
Args:
fires: monitor 전체 발화.
ohlc_df: 3m OHLC.
holdout_start: holdout 시작 시각.
grid: dd_large/medium 후보.
last_price: 미청산 평가가.
Returns:
best_params, train/holdout metrics, grid top-N.
"""
from deepcoin.ground_truth.gt_allocation import simulate_portfolio_summary
grid = grid or default_dd_grid()
chron = sort_fires_chronological(fires)
results: list[dict[str, Any]] = []
for dd_large, dd_medium in product(
grid["dd_large_pct"],
grid["dd_medium_pct"],
):
if dd_medium >= dd_large:
continue
sized, stats = build_monitor_hybrid_sized_trades(
chron,
ohlc_df,
enhanced=False,
dd_large_pct=dd_large,
dd_medium_pct=dd_medium,
)
steps = simulate_portfolio_steps(sized, use_amount_krw=True)
train = portfolio_holdout_from_steps(
[s for s in steps if pd.to_datetime(s["dt"]) < holdout_start],
holdout_start,
initial_if_empty=GT_INITIAL_CASH_KRW,
note="train",
)
# train-only: start 1M → last asset before holdout
if steps:
pre = [
float(s["total_asset_krw"])
for s in steps
if pd.to_datetime(s["dt"]) < holdout_start
]
train_asset_end = pre[-1] if pre else GT_INITIAL_CASH_KRW
train_pnl = (train_asset_end - GT_INITIAL_CASH_KRW) / GT_INITIAL_CASH_KRW * 100
else:
train_pnl = 0.0
holdout = portfolio_holdout_from_steps(
steps,
holdout_start,
note="holdout",
)
full = simulate_portfolio_summary(
sized,
last_price=last_price,
use_amount_krw=True,
)
wf = walk_forward_portfolio_by_month(steps)
pos_months = sum(1 for w in wf if float(w.get("pnl_pct") or 0) > 0)
results.append(
{
"dd_large_pct": dd_large,
"dd_medium_pct": dd_medium,
"train_pnl_pct": round(train_pnl, 2),
"holdout_pnl_pct": float(holdout.get("pnl_pct", 0)),
"full_pnl_pct": float(full.get("pnl_pct", 0)),
"max_drawdown_pct": float(full.get("max_drawdown_pct", 0)),
"wf_positive_months": pos_months,
"wf_months": len(wf),
"large_tier_buys": stats.get("large_tier_buy_count", 0),
}
)
if not results:
return {"best_params": load_hybrid_dd_params(), "note": "empty grid"}
# train PnL 1순위, holdout PnL 2순위
ranked = sorted(
results,
key=lambda x: (x["train_pnl_pct"], x["holdout_pnl_pct"]),
reverse=True,
)
best = ranked[0]
return {
"best_params": {
"dd_large_pct": best["dd_large_pct"],
"dd_medium_pct": best["dd_medium_pct"],
},
"best_metrics": best,
"grid_size": len(results),
"top5": ranked[:5],
"holdout_start": str(holdout_start),
}
def run_and_save_calibration(
fires: pd.DataFrame,
ohlc_df: pd.DataFrame,
*,
outcomes: pd.DataFrame,
last_price: float | None = None,
out_path: Path | None = None,
) -> dict[str, Any]:
"""
캘리브레이션 실행 후 JSON 저장.
Args:
fires: monitor 발화.
ohlc_df: OHLC.
outcomes: fire_outcomes (holdout split).
last_price: 평가 종가.
out_path: 저장 경로.
Returns:
calibrate_hybrid_dd_thresholds 결과.
"""
outcomes_ts = outcomes.copy()
outcomes_ts["ts"] = pd.to_datetime(outcomes_ts["dt"])
holdout_start = outcomes_ts["ts"].quantile(1.0 - MATCH_HOLDOUT_RATIO)
result = calibrate_hybrid_dd_thresholds(
fires,
ohlc_df,
holdout_start=holdout_start,
last_price=last_price,
)
p = out_path or MATCHING_HYBRID_DD_CALIBRATION_JSON
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
return result