feat: persist uploads under resources
업로드 파일을 resources/uploads에 저장하고 파일명 안전화 처리합니다. resources 디렉터리는 git에서 제외하고 uploads/.gitkeep만 추적합니다. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
69
app/main.py
69
app/main.py
@@ -6,6 +6,7 @@ import os
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
@@ -23,7 +24,9 @@ from .stt import transcribe_file, transcribe_iter
|
||||
load_dotenv()
|
||||
|
||||
APP_ROOT = Path(__file__).resolve().parent
|
||||
PROJECT_ROOT = APP_ROOT.parent
|
||||
STATIC_DIR = APP_ROOT / "static"
|
||||
UPLOAD_DIR = PROJECT_ROOT / "resources" / "uploads"
|
||||
|
||||
ALLOWED_EXTS = {".mp3", ".m4a", ".wav", ".mp4", ".aac", ".ogg", ".flac", ".webm"}
|
||||
ALLOWED_MIME_PREFIXES = ("audio/",)
|
||||
@@ -47,6 +50,7 @@ app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
|
||||
def _startup() -> None:
|
||||
# .env 기반으로 DB 테이블 자동 생성
|
||||
db.init_db()
|
||||
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@@ -128,17 +132,17 @@ async def api_create_job(
|
||||
) -> dict[str, Any]:
|
||||
_cleanup_jobs()
|
||||
_validate_upload(file)
|
||||
tmp_path = await _save_upload(file)
|
||||
job_id = str(uuid4())
|
||||
saved_path = await _save_upload(file, file_id=job_id)
|
||||
|
||||
lang = language.strip().lower()
|
||||
if lang in ("", "auto"):
|
||||
lang = ""
|
||||
|
||||
job_id = str(uuid4())
|
||||
job = _Job(
|
||||
job_id=job_id,
|
||||
filename=file.filename,
|
||||
tmp_path=tmp_path,
|
||||
tmp_path=saved_path,
|
||||
language=(lang or None),
|
||||
vad_filter=bool(vad_filter),
|
||||
beam_size=int(beam_size),
|
||||
@@ -188,28 +192,14 @@ async def api_transcribe(
|
||||
) -> dict[str, Any]:
|
||||
_validate_upload(file)
|
||||
|
||||
suffix = Path(file.filename or "").suffix.lower() or ".bin"
|
||||
with tempfile.NamedTemporaryFile(prefix="stt_", suffix=suffix, delete=False) as tmp:
|
||||
tmp_path = tmp.name
|
||||
total = 0
|
||||
while True:
|
||||
chunk = await file.read(1024 * 1024)
|
||||
if not chunk:
|
||||
break
|
||||
total += len(chunk)
|
||||
if total > MAX_UPLOAD_BYTES:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_MB}MB 까지 업로드 가능합니다.",
|
||||
)
|
||||
tmp.write(chunk)
|
||||
|
||||
try:
|
||||
file_id = str(uuid4())
|
||||
saved_path = await _save_upload(file, file_id=file_id)
|
||||
lang = language.strip().lower()
|
||||
if lang in ("", "auto"):
|
||||
lang = ""
|
||||
result = transcribe_file(
|
||||
tmp_path,
|
||||
saved_path,
|
||||
language=(lang or None),
|
||||
vad_filter=bool(vad_filter),
|
||||
beam_size=int(beam_size),
|
||||
@@ -231,10 +221,8 @@ async def api_transcribe(
|
||||
pass
|
||||
return result
|
||||
finally:
|
||||
try:
|
||||
os.remove(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
# 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음)
|
||||
pass
|
||||
|
||||
|
||||
@app.get("/healthz")
|
||||
@@ -308,10 +296,28 @@ def _validate_upload(file: UploadFile) -> None:
|
||||
)
|
||||
|
||||
|
||||
async def _save_upload(file: UploadFile) -> str:
|
||||
suffix = Path(file.filename or "").suffix.lower() or ".bin"
|
||||
with tempfile.NamedTemporaryFile(prefix="stt_", suffix=suffix, delete=False) as tmp:
|
||||
tmp_path = tmp.name
|
||||
_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9._-]+")
|
||||
|
||||
|
||||
def _safe_filename(name: str) -> str:
|
||||
base = Path(name).name # path traversal 방지
|
||||
base = base.strip().replace(" ", "_")
|
||||
base = _FILENAME_SAFE_RE.sub("_", base)
|
||||
if not base:
|
||||
return "upload.bin"
|
||||
if len(base) > 120:
|
||||
stem = Path(base).stem[:100]
|
||||
suf = Path(base).suffix[:20]
|
||||
base = f"{stem}{suf}"
|
||||
return base
|
||||
|
||||
|
||||
async def _save_upload(file: UploadFile, *, file_id: str) -> str:
|
||||
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||
safe = _safe_filename(file.filename or "upload.bin")
|
||||
out_path = UPLOAD_DIR / f"{file_id}_{safe}"
|
||||
tmp_path = str(out_path)
|
||||
with open(tmp_path, "wb") as tmp:
|
||||
total = 0
|
||||
while True:
|
||||
chunk = await file.read(1024 * 1024)
|
||||
@@ -473,9 +479,6 @@ def _run_job(job_id: str) -> None:
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
if tmp_path:
|
||||
try:
|
||||
os.remove(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
# 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음)
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user