feat: persist uploads under resources

업로드 파일을 resources/uploads에 저장하고 파일명 안전화 처리합니다.
resources 디렉터리는 git에서 제외하고 uploads/.gitkeep만 추적합니다.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dsyoon
2026-02-09 18:43:03 +09:00
parent 42feb4a0fa
commit b18c9172a4
3 changed files with 43 additions and 33 deletions

View File

@@ -6,6 +6,7 @@ import os
import tempfile
import threading
import time
import re
from pathlib import Path
from typing import Any
from uuid import uuid4
@@ -23,7 +24,9 @@ from .stt import transcribe_file, transcribe_iter
load_dotenv()
APP_ROOT = Path(__file__).resolve().parent
PROJECT_ROOT = APP_ROOT.parent
STATIC_DIR = APP_ROOT / "static"
UPLOAD_DIR = PROJECT_ROOT / "resources" / "uploads"
ALLOWED_EXTS = {".mp3", ".m4a", ".wav", ".mp4", ".aac", ".ogg", ".flac", ".webm"}
ALLOWED_MIME_PREFIXES = ("audio/",)
@@ -47,6 +50,7 @@ app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
def _startup() -> None:
# .env 기반으로 DB 테이블 자동 생성
db.init_db()
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
@dataclasses.dataclass
@@ -128,17 +132,17 @@ async def api_create_job(
) -> dict[str, Any]:
_cleanup_jobs()
_validate_upload(file)
tmp_path = await _save_upload(file)
job_id = str(uuid4())
saved_path = await _save_upload(file, file_id=job_id)
lang = language.strip().lower()
if lang in ("", "auto"):
lang = ""
job_id = str(uuid4())
job = _Job(
job_id=job_id,
filename=file.filename,
tmp_path=tmp_path,
tmp_path=saved_path,
language=(lang or None),
vad_filter=bool(vad_filter),
beam_size=int(beam_size),
@@ -188,28 +192,14 @@ async def api_transcribe(
) -> dict[str, Any]:
_validate_upload(file)
suffix = Path(file.filename or "").suffix.lower() or ".bin"
with tempfile.NamedTemporaryFile(prefix="stt_", suffix=suffix, delete=False) as tmp:
tmp_path = tmp.name
total = 0
while True:
chunk = await file.read(1024 * 1024)
if not chunk:
break
total += len(chunk)
if total > MAX_UPLOAD_BYTES:
raise HTTPException(
status_code=413,
detail=f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_MB}MB 까지 업로드 가능합니다.",
)
tmp.write(chunk)
try:
file_id = str(uuid4())
saved_path = await _save_upload(file, file_id=file_id)
lang = language.strip().lower()
if lang in ("", "auto"):
lang = ""
result = transcribe_file(
tmp_path,
saved_path,
language=(lang or None),
vad_filter=bool(vad_filter),
beam_size=int(beam_size),
@@ -231,10 +221,8 @@ async def api_transcribe(
pass
return result
finally:
try:
os.remove(tmp_path)
except OSError:
pass
# 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음)
pass
@app.get("/healthz")
@@ -308,10 +296,28 @@ def _validate_upload(file: UploadFile) -> None:
)
async def _save_upload(file: UploadFile) -> str:
suffix = Path(file.filename or "").suffix.lower() or ".bin"
with tempfile.NamedTemporaryFile(prefix="stt_", suffix=suffix, delete=False) as tmp:
tmp_path = tmp.name
_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9._-]+")
def _safe_filename(name: str) -> str:
base = Path(name).name # path traversal 방지
base = base.strip().replace(" ", "_")
base = _FILENAME_SAFE_RE.sub("_", base)
if not base:
return "upload.bin"
if len(base) > 120:
stem = Path(base).stem[:100]
suf = Path(base).suffix[:20]
base = f"{stem}{suf}"
return base
async def _save_upload(file: UploadFile, *, file_id: str) -> str:
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
safe = _safe_filename(file.filename or "upload.bin")
out_path = UPLOAD_DIR / f"{file_id}_{safe}"
tmp_path = str(out_path)
with open(tmp_path, "wb") as tmp:
total = 0
while True:
chunk = await file.read(1024 * 1024)
@@ -473,9 +479,6 @@ def _run_job(job_id: str) -> None:
except Exception:
pass
finally:
if tmp_path:
try:
os.remove(tmp_path)
except OSError:
pass
# 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음)
pass