""" WLD 과거 봉을 빗썸 API에서 받아 coins.db에 저장합니다. - 최초: 최근 N개월 전량 적재 - 이후: DB 마지막 시각 **이후** 봉만 추가 (증분) """ from __future__ import annotations import sqlite3 from datetime import datetime import pandas as pd from dateutil.relativedelta import relativedelta from config import ( BITHUMB_MINUTE_INTERVALS, COIN_NAME, DAILY_INTERVAL_MIN, DB_PATH, DOWNLOAD_BACKFILL_EXTRA_BARS, DOWNLOAD_DAILY_EXTRA_DAYS, DOWNLOAD_INTERVALS, DOWNLOAD_MIN_INCREMENTAL_BARS, DOWNLOAD_MONTHS, DOWNLOAD_MONTHS_1M, INCREMENTAL_OVERLAP_BARS, KR_COINS, SYMBOL, ) from deepcoin.ops.monitor import Monitor def bong_count_for_months(interval_minutes: int, months: int) -> int: """N개월치 봉 개수(여유분 포함).""" days = months * 30 if interval_minutes >= DAILY_INTERVAL_MIN: return days + DOWNLOAD_DAILY_EXTRA_DAYS bars_per_day = (24 * 60) // interval_minutes return days * bars_per_day + DOWNLOAD_BACKFILL_EXTRA_BARS def bong_count_since( interval_minutes: int, last_ts: pd.Timestamp, overlap: int = INCREMENTAL_OVERLAP_BARS ) -> int: """마지막 저장 시각 이후 필요한 API 봉 수(겹침 포함).""" now = pd.Timestamp.now() if last_ts.tzinfo is not None and now.tzinfo is None: last_ts = last_ts.tz_localize(None) delta_min = max(0, (now - last_ts).total_seconds() / 60) bars = int(delta_min / interval_minutes) + overlap + 10 return max(bars, DOWNLOAD_MIN_INCREMENTAL_BARS) def months_cutoff(months: int) -> pd.Timestamp: """N개월 전 시각.""" return pd.Timestamp(datetime.now() - relativedelta(months=months)) def trim_to_recent_months(data: pd.DataFrame, months: int) -> pd.DataFrame: """최근 N개월 구간만 남깁니다.""" if data is None or data.empty: return data cutoff = months_cutoff(months) if not isinstance(data.index, pd.DatetimeIndex): data = data.copy() data.index = pd.to_datetime(data.index) return data[data.index >= cutoff].copy() def interval_label(interval: int) -> str: if interval >= 1440: return "일봉(1440)" return f"{interval}분봉" def months_for_interval(interval: int, default_months: int) -> int: """간격별 DB 보관 개월 수 (1분봉은 별도 상한).""" if interval == 1: return DOWNLOAD_MONTHS_1M return default_months def download_jobs() -> list[tuple[int, str]]: labels = { 1: "1분", 3: "3분", 5: "5분", 10: "10분", 15: "15분", 30: "30분", 60: "60분(1시간)", 240: "240분(4시간)", 1440: "1440분(1일)", } jobs = [] for iv in DOWNLOAD_INTERVALS: if iv < 1440 and iv not in BITHUMB_MINUTE_INTERVALS: print(f"경고: {iv}분봉은 빗썸 API 미지원 — 건너뜀") continue jobs.append((iv, labels.get(iv, f"{iv}분"))) return jobs def ensure_table(cursor, table_name: str) -> None: cursor.execute( f"CREATE TABLE IF NOT EXISTS {table_name} " "(CODE text, NAME text, ymdhms datetime, ymd text, hms text, " "Close REAL, Open REAL, High REAL, Low REAL, Volume REAL)" ) cursor.execute( f"CREATE INDEX IF NOT EXISTS {table_name}_idx ON {table_name}(CODE, ymdhms)" ) def get_earliest_timestamp( symbol: str, interval: int, db_path: str = DB_PATH ) -> pd.Timestamp | None: """테이블에 저장된 해당 심볼의 가장 오래된 봉 시각.""" conn = sqlite3.connect(db_path) cursor = conn.cursor() table_name = f"{symbol}_{interval}" ensure_table(cursor, table_name) cursor.execute( f"SELECT MIN(ymdhms) FROM {table_name} WHERE CODE = ?", (symbol,), ) row = cursor.fetchone() conn.close() if row and row[0]: return pd.Timestamp(row[0]) return None def get_last_timestamp( symbol: str, interval: int, db_path: str = DB_PATH ) -> pd.Timestamp | None: """테이블에 저장된 해당 심볼의 마지막 봉 시각.""" conn = sqlite3.connect(db_path) cursor = conn.cursor() table_name = f"{symbol}_{interval}" ensure_table(cursor, table_name) cursor.execute( f"SELECT MAX(ymdhms) FROM {table_name} WHERE CODE = ?", (symbol,), ) row = cursor.fetchone() conn.close() if row and row[0]: return pd.Timestamp(row[0]) return None def get_row_count(symbol: str, interval: int, db_path: str = DB_PATH) -> int: """저장된 봉 개수.""" conn = sqlite3.connect(db_path) cursor = conn.cursor() table_name = f"{symbol}_{interval}" ensure_table(cursor, table_name) cursor.execute( f"SELECT COUNT(*) FROM {table_name} WHERE CODE = ?", (symbol,), ) row = cursor.fetchone() conn.close() return int(row[0]) if row else 0 def filter_after_last( data: pd.DataFrame, last_ts: pd.Timestamp | None ) -> pd.DataFrame: """마지막 저장 시각보다 이후(>)인 봉만 반환.""" if data is None or data.empty or last_ts is None: return data if not isinstance(data.index, pd.DatetimeIndex): data = data.copy() data.index = pd.to_datetime(data.index) last = pd.Timestamp(last_ts) return data[data.index > last].copy() def prune_before_cutoff( symbol: str, interval: int, months: int, db_path: str = DB_PATH ) -> int: """N개월보다 오래된 봉 삭제 (DB 용량 유지).""" cutoff = months_cutoff(months).strftime("%Y-%m-%d %H:%M:%S") conn = sqlite3.connect(db_path) cursor = conn.cursor() table_name = f"{symbol}_{interval}" ensure_table(cursor, table_name) cursor.execute( f"DELETE FROM {table_name} WHERE CODE = ? AND ymdhms < ?", (symbol, cutoff), ) deleted = cursor.rowcount conn.commit() cursor.close() conn.close() return deleted def append_data( symbol: str, interval: int, data: pd.DataFrame, last_ts: pd.Timestamp | None = None, db_path: str = DB_PATH, ) -> tuple[int, int]: """ 마지막 시각 이후 봉만 INSERT합니다. 기존 데이터는 삭제하지 않습니다. Args: last_ts: None이면 전체 data 적재, 있으면 index > last_ts 만 적재 Returns: (추가된 행 수, 스킵된 행 수) """ if data is None or data.empty: return 0, 0 total = len(data) to_save = data if last_ts is None else filter_after_last(data, last_ts) skipped = total - len(to_save) if to_save.empty: return 0, skipped conn = sqlite3.connect(db_path) cursor = conn.cursor() table_name = f"{symbol}_{interval}" ensure_table(cursor, table_name) records = [] for i in range(len(to_save)): ts = to_save.index[i] if hasattr(ts, "to_pydatetime"): ts = ts.to_pydatetime() ymd = ts.strftime("%Y%m%d") hms = ts.strftime("%H%M%S") ymdhms = ts.strftime("%Y-%m-%d %H:%M:%S") records.append( ( symbol, KR_COINS[symbol], ymdhms, ymd, hms, float(to_save["Open"].iloc[i]), float(to_save["High"].iloc[i]), float(to_save["Low"].iloc[i]), float(to_save["Close"].iloc[i]), float(to_save["Volume"].iloc[i]), ) ) cursor.executemany( f"INSERT INTO {table_name} " "(CODE, NAME, ymdhms, ymd, hms, Close, Open, High, Low, Volume) " "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", records, ) conn.commit() cursor.close() conn.close() return len(records), skipped def backfill_before_earliest( monitor: Monitor, symbol: str, interval: int, months: int, ) -> int: """ DB 최초 봉보다 오래된 구간을 API로 채웁니다 (1년 적재 시 필요). Returns: 추가된 행 수. """ months = months_for_interval(interval, months) cutoff = months_cutoff(months) earliest = get_earliest_timestamp(symbol, interval) if earliest is None or earliest <= cutoff: return 0 label = interval_label(interval) # now부터 역순 수집이므로 cutoff까지 닿으려면 N개월 전체 봉 수가 필요 target = bong_count_for_months(interval, months) print( f" [백필] {label} — {cutoff.date()} ~ {earliest} " f"(API 역수집 약 {target}봉)" ) data = monitor.get_coin_more_data( symbol, interval, bong_count=target, verbose=True ) if data is None or data.empty: print(" -> 백필 API 데이터 없음") return 0 if not isinstance(data.index, pd.DatetimeIndex): data.index = pd.to_datetime(data.index) hist = data[(data.index >= cutoff) & (data.index < earliest)].copy() if hist.empty: print(" -> 백필 대상 구간 없음") return 0 inserted, skipped = append_data(symbol, interval, hist, last_ts=None) print(f" -> 백필 추가 {inserted}행 (스킵 {skipped})") return inserted def download_symbol( monitor: Monitor, symbol: str, interval: int, months: int, *, verbose: bool = True, skip_backfill: bool = False, ) -> None: """ 한 간격의 봉을 API로 받아 증분·백필 저장합니다. Args: skip_backfill: True면 운영 증분만(백필 생략). 05/06 시작 동기화용. """ months = months_for_interval(interval, months) label = interval_label(interval) existing = get_row_count(symbol, interval) if existing > 0 and not skip_backfill: backfill_before_earliest(monitor, symbol, interval, months) last_ts = get_last_timestamp(symbol, interval) if last_ts is None: target = bong_count_for_months(interval, months) mode = "초기 적재" else: target = min( bong_count_since(interval, last_ts), bong_count_for_months(interval, months), ) mode = f"증분 (마지막 {last_ts.strftime('%Y-%m-%d %H:%M:%S')} 이후)" print(f"\n[{symbol}] {label} — {mode}") print(f" DB 기존 {existing}행 | API 목표 약 {target}봉") data = monitor.get_coin_more_data( symbol, interval, bong_count=target, verbose=verbose ) if data is None or data.empty: print(" -> API 데이터 없음") return data = trim_to_recent_months(data, months) if data.empty: print(" -> 최근 N개월 필터 후 데이터 없음") return inserted, skipped = append_data(symbol, interval, data, last_ts=last_ts) pruned = prune_before_cutoff(symbol, interval, months) new_last = get_last_timestamp(symbol, interval) total = get_row_count(symbol, interval) print(f" -> API {len(data)}봉 | 추가 {inserted}행 | 스킵(기존) {skipped}행") if pruned > 0: print(f" -> {months}개월 이전 {pruned}행 정리") if new_last is not None: print(f" -> DB 합계 {total}행 | {data.index[0]} ~ {new_last}") def download(months: int | None = None) -> None: """ WLD 다중 분봉·일봉을 coins.db에 증분 적재합니다. 간격: config.DOWNLOAD_INTERVALS """ months = months or DOWNLOAD_MONTHS monitor = Monitor(cooldown_file=None) jobs = download_jobs() intervals_str = ", ".join(str(iv) for iv, _ in jobs) print(f"=== {COIN_NAME} ({SYMBOL}) -> {DB_PATH} (증분 INSERT) ===") print(f"보관 {months}개월 | 간격(분): {intervals_str}") started = datetime.now() for interval, desc in jobs: print(f"\n--- {desc} ---") try: download_symbol(monitor, SYMBOL, interval, months) except Exception as e: print(f"오류 interval={interval}: {e}") elapsed = datetime.now() - started print(f"\n완료 (소요: {elapsed})") if __name__ == "__main__": download()