diff --git a/.gitignore b/.gitignore index bee9b00..ecc08ef 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,9 @@ __pycache__/ # optional local artifacts *.log + +# resources (업로드/아티팩트는 git에서 제외) +resources/* +!resources/uploads/ +resources/uploads/* +!resources/uploads/.gitkeep diff --git a/app/main.py b/app/main.py index 822c0e4..9abe7aa 100644 --- a/app/main.py +++ b/app/main.py @@ -6,6 +6,7 @@ import os import tempfile import threading import time +import re from pathlib import Path from typing import Any from uuid import uuid4 @@ -23,7 +24,9 @@ from .stt import transcribe_file, transcribe_iter load_dotenv() APP_ROOT = Path(__file__).resolve().parent +PROJECT_ROOT = APP_ROOT.parent STATIC_DIR = APP_ROOT / "static" +UPLOAD_DIR = PROJECT_ROOT / "resources" / "uploads" ALLOWED_EXTS = {".mp3", ".m4a", ".wav", ".mp4", ".aac", ".ogg", ".flac", ".webm"} ALLOWED_MIME_PREFIXES = ("audio/",) @@ -47,6 +50,7 @@ app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static") def _startup() -> None: # .env 기반으로 DB 테이블 자동 생성 db.init_db() + UPLOAD_DIR.mkdir(parents=True, exist_ok=True) @dataclasses.dataclass @@ -128,17 +132,17 @@ async def api_create_job( ) -> dict[str, Any]: _cleanup_jobs() _validate_upload(file) - tmp_path = await _save_upload(file) + job_id = str(uuid4()) + saved_path = await _save_upload(file, file_id=job_id) lang = language.strip().lower() if lang in ("", "auto"): lang = "" - job_id = str(uuid4()) job = _Job( job_id=job_id, filename=file.filename, - tmp_path=tmp_path, + tmp_path=saved_path, language=(lang or None), vad_filter=bool(vad_filter), beam_size=int(beam_size), @@ -188,28 +192,14 @@ async def api_transcribe( ) -> dict[str, Any]: _validate_upload(file) - suffix = Path(file.filename or "").suffix.lower() or ".bin" - with tempfile.NamedTemporaryFile(prefix="stt_", suffix=suffix, delete=False) as tmp: - tmp_path = tmp.name - total = 0 - while True: - chunk = await file.read(1024 * 1024) - if not chunk: - break - total += len(chunk) - if total > MAX_UPLOAD_BYTES: - raise HTTPException( - status_code=413, - detail=f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_MB}MB 까지 업로드 가능합니다.", - ) - tmp.write(chunk) - try: + file_id = str(uuid4()) + saved_path = await _save_upload(file, file_id=file_id) lang = language.strip().lower() if lang in ("", "auto"): lang = "" result = transcribe_file( - tmp_path, + saved_path, language=(lang or None), vad_filter=bool(vad_filter), beam_size=int(beam_size), @@ -231,10 +221,8 @@ async def api_transcribe( pass return result finally: - try: - os.remove(tmp_path) - except OSError: - pass + # 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음) + pass @app.get("/healthz") @@ -308,10 +296,28 @@ def _validate_upload(file: UploadFile) -> None: ) -async def _save_upload(file: UploadFile) -> str: - suffix = Path(file.filename or "").suffix.lower() or ".bin" - with tempfile.NamedTemporaryFile(prefix="stt_", suffix=suffix, delete=False) as tmp: - tmp_path = tmp.name +_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9._-]+") + + +def _safe_filename(name: str) -> str: + base = Path(name).name # path traversal 방지 + base = base.strip().replace(" ", "_") + base = _FILENAME_SAFE_RE.sub("_", base) + if not base: + return "upload.bin" + if len(base) > 120: + stem = Path(base).stem[:100] + suf = Path(base).suffix[:20] + base = f"{stem}{suf}" + return base + + +async def _save_upload(file: UploadFile, *, file_id: str) -> str: + UPLOAD_DIR.mkdir(parents=True, exist_ok=True) + safe = _safe_filename(file.filename or "upload.bin") + out_path = UPLOAD_DIR / f"{file_id}_{safe}" + tmp_path = str(out_path) + with open(tmp_path, "wb") as tmp: total = 0 while True: chunk = await file.read(1024 * 1024) @@ -473,9 +479,6 @@ def _run_job(job_id: str) -> None: except Exception: pass finally: - if tmp_path: - try: - os.remove(tmp_path) - except OSError: - pass + # 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음) + pass diff --git a/resources/uploads/.gitkeep b/resources/uploads/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/resources/uploads/.gitkeep @@ -0,0 +1 @@ +