feat: persist uploads under resources
업로드 파일을 resources/uploads에 저장하고 파일명 안전화 처리합니다. resources 디렉터리는 git에서 제외하고 uploads/.gitkeep만 추적합니다. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -6,3 +6,9 @@ __pycache__/
|
|||||||
|
|
||||||
# optional local artifacts
|
# optional local artifacts
|
||||||
*.log
|
*.log
|
||||||
|
|
||||||
|
# resources (업로드/아티팩트는 git에서 제외)
|
||||||
|
resources/*
|
||||||
|
!resources/uploads/
|
||||||
|
resources/uploads/*
|
||||||
|
!resources/uploads/.gitkeep
|
||||||
|
|||||||
65
app/main.py
65
app/main.py
@@ -6,6 +6,7 @@ import os
|
|||||||
import tempfile
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
@@ -23,7 +24,9 @@ from .stt import transcribe_file, transcribe_iter
|
|||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
APP_ROOT = Path(__file__).resolve().parent
|
APP_ROOT = Path(__file__).resolve().parent
|
||||||
|
PROJECT_ROOT = APP_ROOT.parent
|
||||||
STATIC_DIR = APP_ROOT / "static"
|
STATIC_DIR = APP_ROOT / "static"
|
||||||
|
UPLOAD_DIR = PROJECT_ROOT / "resources" / "uploads"
|
||||||
|
|
||||||
ALLOWED_EXTS = {".mp3", ".m4a", ".wav", ".mp4", ".aac", ".ogg", ".flac", ".webm"}
|
ALLOWED_EXTS = {".mp3", ".m4a", ".wav", ".mp4", ".aac", ".ogg", ".flac", ".webm"}
|
||||||
ALLOWED_MIME_PREFIXES = ("audio/",)
|
ALLOWED_MIME_PREFIXES = ("audio/",)
|
||||||
@@ -47,6 +50,7 @@ app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
|
|||||||
def _startup() -> None:
|
def _startup() -> None:
|
||||||
# .env 기반으로 DB 테이블 자동 생성
|
# .env 기반으로 DB 테이블 자동 생성
|
||||||
db.init_db()
|
db.init_db()
|
||||||
|
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
@@ -128,17 +132,17 @@ async def api_create_job(
|
|||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
_cleanup_jobs()
|
_cleanup_jobs()
|
||||||
_validate_upload(file)
|
_validate_upload(file)
|
||||||
tmp_path = await _save_upload(file)
|
job_id = str(uuid4())
|
||||||
|
saved_path = await _save_upload(file, file_id=job_id)
|
||||||
|
|
||||||
lang = language.strip().lower()
|
lang = language.strip().lower()
|
||||||
if lang in ("", "auto"):
|
if lang in ("", "auto"):
|
||||||
lang = ""
|
lang = ""
|
||||||
|
|
||||||
job_id = str(uuid4())
|
|
||||||
job = _Job(
|
job = _Job(
|
||||||
job_id=job_id,
|
job_id=job_id,
|
||||||
filename=file.filename,
|
filename=file.filename,
|
||||||
tmp_path=tmp_path,
|
tmp_path=saved_path,
|
||||||
language=(lang or None),
|
language=(lang or None),
|
||||||
vad_filter=bool(vad_filter),
|
vad_filter=bool(vad_filter),
|
||||||
beam_size=int(beam_size),
|
beam_size=int(beam_size),
|
||||||
@@ -188,28 +192,14 @@ async def api_transcribe(
|
|||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
_validate_upload(file)
|
_validate_upload(file)
|
||||||
|
|
||||||
suffix = Path(file.filename or "").suffix.lower() or ".bin"
|
|
||||||
with tempfile.NamedTemporaryFile(prefix="stt_", suffix=suffix, delete=False) as tmp:
|
|
||||||
tmp_path = tmp.name
|
|
||||||
total = 0
|
|
||||||
while True:
|
|
||||||
chunk = await file.read(1024 * 1024)
|
|
||||||
if not chunk:
|
|
||||||
break
|
|
||||||
total += len(chunk)
|
|
||||||
if total > MAX_UPLOAD_BYTES:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=413,
|
|
||||||
detail=f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_MB}MB 까지 업로드 가능합니다.",
|
|
||||||
)
|
|
||||||
tmp.write(chunk)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
file_id = str(uuid4())
|
||||||
|
saved_path = await _save_upload(file, file_id=file_id)
|
||||||
lang = language.strip().lower()
|
lang = language.strip().lower()
|
||||||
if lang in ("", "auto"):
|
if lang in ("", "auto"):
|
||||||
lang = ""
|
lang = ""
|
||||||
result = transcribe_file(
|
result = transcribe_file(
|
||||||
tmp_path,
|
saved_path,
|
||||||
language=(lang or None),
|
language=(lang or None),
|
||||||
vad_filter=bool(vad_filter),
|
vad_filter=bool(vad_filter),
|
||||||
beam_size=int(beam_size),
|
beam_size=int(beam_size),
|
||||||
@@ -231,9 +221,7 @@ async def api_transcribe(
|
|||||||
pass
|
pass
|
||||||
return result
|
return result
|
||||||
finally:
|
finally:
|
||||||
try:
|
# 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음)
|
||||||
os.remove(tmp_path)
|
|
||||||
except OSError:
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@@ -308,10 +296,28 @@ def _validate_upload(file: UploadFile) -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _save_upload(file: UploadFile) -> str:
|
_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9._-]+")
|
||||||
suffix = Path(file.filename or "").suffix.lower() or ".bin"
|
|
||||||
with tempfile.NamedTemporaryFile(prefix="stt_", suffix=suffix, delete=False) as tmp:
|
|
||||||
tmp_path = tmp.name
|
def _safe_filename(name: str) -> str:
|
||||||
|
base = Path(name).name # path traversal 방지
|
||||||
|
base = base.strip().replace(" ", "_")
|
||||||
|
base = _FILENAME_SAFE_RE.sub("_", base)
|
||||||
|
if not base:
|
||||||
|
return "upload.bin"
|
||||||
|
if len(base) > 120:
|
||||||
|
stem = Path(base).stem[:100]
|
||||||
|
suf = Path(base).suffix[:20]
|
||||||
|
base = f"{stem}{suf}"
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
async def _save_upload(file: UploadFile, *, file_id: str) -> str:
|
||||||
|
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
safe = _safe_filename(file.filename or "upload.bin")
|
||||||
|
out_path = UPLOAD_DIR / f"{file_id}_{safe}"
|
||||||
|
tmp_path = str(out_path)
|
||||||
|
with open(tmp_path, "wb") as tmp:
|
||||||
total = 0
|
total = 0
|
||||||
while True:
|
while True:
|
||||||
chunk = await file.read(1024 * 1024)
|
chunk = await file.read(1024 * 1024)
|
||||||
@@ -473,9 +479,6 @@ def _run_job(job_id: str) -> None:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
finally:
|
finally:
|
||||||
if tmp_path:
|
# 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음)
|
||||||
try:
|
|
||||||
os.remove(tmp_path)
|
|
||||||
except OSError:
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
1
resources/uploads/.gitkeep
Normal file
1
resources/uploads/.gitkeep
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
Reference in New Issue
Block a user