feat: persist uploads under resources

업로드 파일을 resources/uploads에 저장하고 파일명 안전화 처리합니다.
resources 디렉터리는 git에서 제외하고 uploads/.gitkeep만 추적합니다.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dsyoon
2026-02-09 18:43:03 +09:00
parent 42feb4a0fa
commit b18c9172a4
3 changed files with 43 additions and 33 deletions

6
.gitignore vendored
View File

@@ -6,3 +6,9 @@ __pycache__/
# optional local artifacts # optional local artifacts
*.log *.log
# resources (업로드/아티팩트는 git에서 제외)
resources/*
!resources/uploads/
resources/uploads/*
!resources/uploads/.gitkeep

View File

@@ -6,6 +6,7 @@ import os
import tempfile import tempfile
import threading import threading
import time import time
import re
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
from uuid import uuid4 from uuid import uuid4
@@ -23,7 +24,9 @@ from .stt import transcribe_file, transcribe_iter
load_dotenv() load_dotenv()
APP_ROOT = Path(__file__).resolve().parent APP_ROOT = Path(__file__).resolve().parent
PROJECT_ROOT = APP_ROOT.parent
STATIC_DIR = APP_ROOT / "static" STATIC_DIR = APP_ROOT / "static"
UPLOAD_DIR = PROJECT_ROOT / "resources" / "uploads"
ALLOWED_EXTS = {".mp3", ".m4a", ".wav", ".mp4", ".aac", ".ogg", ".flac", ".webm"} ALLOWED_EXTS = {".mp3", ".m4a", ".wav", ".mp4", ".aac", ".ogg", ".flac", ".webm"}
ALLOWED_MIME_PREFIXES = ("audio/",) ALLOWED_MIME_PREFIXES = ("audio/",)
@@ -47,6 +50,7 @@ app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
def _startup() -> None: def _startup() -> None:
# .env 기반으로 DB 테이블 자동 생성 # .env 기반으로 DB 테이블 자동 생성
db.init_db() db.init_db()
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
@dataclasses.dataclass @dataclasses.dataclass
@@ -128,17 +132,17 @@ async def api_create_job(
) -> dict[str, Any]: ) -> dict[str, Any]:
_cleanup_jobs() _cleanup_jobs()
_validate_upload(file) _validate_upload(file)
tmp_path = await _save_upload(file) job_id = str(uuid4())
saved_path = await _save_upload(file, file_id=job_id)
lang = language.strip().lower() lang = language.strip().lower()
if lang in ("", "auto"): if lang in ("", "auto"):
lang = "" lang = ""
job_id = str(uuid4())
job = _Job( job = _Job(
job_id=job_id, job_id=job_id,
filename=file.filename, filename=file.filename,
tmp_path=tmp_path, tmp_path=saved_path,
language=(lang or None), language=(lang or None),
vad_filter=bool(vad_filter), vad_filter=bool(vad_filter),
beam_size=int(beam_size), beam_size=int(beam_size),
@@ -188,28 +192,14 @@ async def api_transcribe(
) -> dict[str, Any]: ) -> dict[str, Any]:
_validate_upload(file) _validate_upload(file)
suffix = Path(file.filename or "").suffix.lower() or ".bin"
with tempfile.NamedTemporaryFile(prefix="stt_", suffix=suffix, delete=False) as tmp:
tmp_path = tmp.name
total = 0
while True:
chunk = await file.read(1024 * 1024)
if not chunk:
break
total += len(chunk)
if total > MAX_UPLOAD_BYTES:
raise HTTPException(
status_code=413,
detail=f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_MB}MB 까지 업로드 가능합니다.",
)
tmp.write(chunk)
try: try:
file_id = str(uuid4())
saved_path = await _save_upload(file, file_id=file_id)
lang = language.strip().lower() lang = language.strip().lower()
if lang in ("", "auto"): if lang in ("", "auto"):
lang = "" lang = ""
result = transcribe_file( result = transcribe_file(
tmp_path, saved_path,
language=(lang or None), language=(lang or None),
vad_filter=bool(vad_filter), vad_filter=bool(vad_filter),
beam_size=int(beam_size), beam_size=int(beam_size),
@@ -231,10 +221,8 @@ async def api_transcribe(
pass pass
return result return result
finally: finally:
try: # 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음)
os.remove(tmp_path) pass
except OSError:
pass
@app.get("/healthz") @app.get("/healthz")
@@ -308,10 +296,28 @@ def _validate_upload(file: UploadFile) -> None:
) )
async def _save_upload(file: UploadFile) -> str: _FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9._-]+")
suffix = Path(file.filename or "").suffix.lower() or ".bin"
with tempfile.NamedTemporaryFile(prefix="stt_", suffix=suffix, delete=False) as tmp:
tmp_path = tmp.name def _safe_filename(name: str) -> str:
base = Path(name).name # path traversal 방지
base = base.strip().replace(" ", "_")
base = _FILENAME_SAFE_RE.sub("_", base)
if not base:
return "upload.bin"
if len(base) > 120:
stem = Path(base).stem[:100]
suf = Path(base).suffix[:20]
base = f"{stem}{suf}"
return base
async def _save_upload(file: UploadFile, *, file_id: str) -> str:
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
safe = _safe_filename(file.filename or "upload.bin")
out_path = UPLOAD_DIR / f"{file_id}_{safe}"
tmp_path = str(out_path)
with open(tmp_path, "wb") as tmp:
total = 0 total = 0
while True: while True:
chunk = await file.read(1024 * 1024) chunk = await file.read(1024 * 1024)
@@ -473,9 +479,6 @@ def _run_job(job_id: str) -> None:
except Exception: except Exception:
pass pass
finally: finally:
if tmp_path: # 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음)
try: pass
os.remove(tmp_path)
except OSError:
pass

View File

@@ -0,0 +1 @@