commit 7adaa7310251254eaeabe95c3b78a9b72e606b00 Author: dsyoon Date: Wed Feb 25 19:07:56 2026 +0900 Initial commit after re-install diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ecc08ef --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +.venv/ +__pycache__/ +*.pyc +.DS_Store +.env + +# optional local artifacts +*.log + +# resources (업로드/아티팩트는 git에서 제외) +resources/* +!resources/uploads/ +resources/uploads/* +!resources/uploads/.gitkeep diff --git a/README.md b/README.md new file mode 100644 index 0000000..cc8c235 --- /dev/null +++ b/README.md @@ -0,0 +1,66 @@ +# Web STT (mp3/m4a 업로드 → 텍스트 변환) + +## 구성 +- **백엔드**: FastAPI (업로드/검증/STT 수행) +- **STT 엔진**: `faster-whisper` (Whisper 모델) +- **프론트**: 단일 HTML (파일 선택 → 전사 → 결과 표시/다운로드) + +## 동작 개요 (pseudocode) + +```text +UI: + onSelect(file): + validate client-side (extension) + enable "전사" 버튼 + + onClickTranscribe(): + POST /api/transcribe (multipart/form-data, file, options) + show progress (업로드 중 / 처리 중) + render returned text + segments + allow download as .txt + +API: + POST /api/transcribe: + if no file -> 400 + validate mime/ext in allowed audio types -> 415 if not + save to temp file + run STT(model, language, vad_filter, beam_size, ...) + return { text, segments[], detected_language, duration_sec } + cleanup temp file +``` + +## 실행 + +### 1) miniconda `ncue` 환경 준비 + +이미 `ncue`가 있다면: +```bash +conda activate ncue +pip install -r requirements.txt +``` + +`ncue`가 없다면(권장): +```bash +conda env create -f environment.yml +conda activate ncue +``` + +### 2) ffmpeg +`environment.yml`로 설치하면 자동 포함됩니다. (수동 설치 시 아래) + +macOS (Homebrew): +```bash +brew install ffmpeg +``` + +### 3) 서버 실행 +```bash +uvicorn app.main:app --reload --host 127.0.0.1 --port 8025 +``` + +브라우저에서 `http://127.0.0.1:8025` 접속. + +## 옵션 +- **모델**: 기본 `small` (정확도/속도 균형). `APP_WHISPER_MODEL=base|small|medium|large-v3` 등으로 변경 가능 +- **디바이스**: 기본 CPU. Apple Silicon에서 Metal은 `faster-whisper` 단독으로는 제한이 있어 CPU 기본값을 권장 + diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..fe16459 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,2 @@ +__all__ = [] + diff --git a/app/db.py b/app/db.py new file mode 100644 index 0000000..32ee401 --- /dev/null +++ b/app/db.py @@ -0,0 +1,229 @@ +from __future__ import annotations + +import os +import re +from typing import Any, Iterable + +import psycopg +from psycopg import sql +from psycopg.rows import dict_row +from psycopg.types.json import Json + + +_IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") + + +def _table_name() -> str: + name = os.getenv("TABLE", "ncue_stt").strip() + if not _IDENT_RE.match(name): + raise RuntimeError("TABLE 환경변수가 올바르지 않습니다.") + return name + + +def _conninfo() -> str: + host = os.getenv("DB_HOST", "").strip() + port = os.getenv("DB_PORT", "5432").strip() + dbname = os.getenv("DB_NAME", "").strip() + user = os.getenv("DB_USER", "").strip() + password = os.getenv("DB_PASSWORD", "").strip() + sslmode = os.getenv("DB_SSLMODE", "").strip() # optional + + missing = [k for k, v in (("DB_HOST", host), ("DB_NAME", dbname), ("DB_USER", user), ("DB_PASSWORD", password)) if not v] + if missing: + raise RuntimeError(f"DB 환경변수 누락: {', '.join(missing)}") + + parts = [ + f"host={host}", + f"port={port}", + f"dbname={dbname}", + f"user={user}", + f"password={password}", + ] + if sslmode: + parts.append(f"sslmode={sslmode}") + return " ".join(parts) + + +def connect() -> psycopg.Connection[Any]: + return psycopg.connect(_conninfo(), row_factory=dict_row, connect_timeout=5) + + +def init_db() -> None: + table = _table_name() + create_sql = sql.SQL( + """ + CREATE TABLE IF NOT EXISTS {table} ( + id BIGSERIAL PRIMARY KEY, + author_id TEXT NOT NULL, + filename TEXT, + language_requested TEXT, + detected_language TEXT, + language_probability DOUBLE PRECISION, + duration_sec DOUBLE PRECISION, + status TEXT NOT NULL DEFAULT 'completed', + text TEXT NOT NULL DEFAULT '', + segments JSONB NOT NULL DEFAULT '[]'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() + ); + """ + ).format(table=sql.Identifier(table)) + + idx1 = sql.SQL("CREATE INDEX IF NOT EXISTS {idx} ON {table}(author_id);").format( + idx=sql.Identifier(f"{table}_author_id_idx"), table=sql.Identifier(table) + ) + idx2 = sql.SQL("CREATE INDEX IF NOT EXISTS {idx} ON {table}(created_at DESC);").format( + idx=sql.Identifier(f"{table}_created_at_idx"), table=sql.Identifier(table) + ) + + with connect() as conn: + with conn.cursor() as cur: + cur.execute(create_sql) + cur.execute(idx1) + cur.execute(idx2) + conn.commit() + + +def insert_record( + *, + author_id: str, + filename: str | None, + language_requested: str | None, + detected_language: str | None, + language_probability: float | None, + duration_sec: float | None, + status: str, + text: str, + segments: list[dict[str, Any]], +) -> int: + table = _table_name() + q = sql.SQL( + """ + INSERT INTO {table} + (author_id, filename, language_requested, detected_language, language_probability, duration_sec, status, text, segments) + VALUES + (%s, %s, %s, %s, %s, %s, %s, %s, %s) + RETURNING id; + """ + ).format(table=sql.Identifier(table)) + + with connect() as conn: + with conn.cursor() as cur: + cur.execute( + q, + ( + author_id, + filename, + language_requested, + detected_language, + language_probability, + duration_sec, + status, + text or "", + Json(segments or []), + ), + ) + row = cur.fetchone() + conn.commit() + return int(row["id"]) + + +def list_records(*, limit: int = 50, offset: int = 0, author_id: str | None = None, q: str | None = None) -> dict[str, Any]: + table = _table_name() + limit = max(1, min(int(limit), 200)) + offset = max(0, int(offset)) + + where_parts: list[sql.SQL] = [] + params: list[Any] = [] + + if author_id: + where_parts.append(sql.SQL("author_id = %s")) + params.append(author_id) + if q: + where_parts.append(sql.SQL("(filename ILIKE %s OR text ILIKE %s)")) + params.extend([f"%{q}%", f"%{q}%"]) + + where_sql = sql.SQL("") + if where_parts: + where_sql = sql.SQL("WHERE ") + sql.SQL(" AND ").join(where_parts) + + count_q = sql.SQL("SELECT count(*)::bigint AS cnt FROM {table} {where};").format( + table=sql.Identifier(table), where=where_sql + ) + list_q = sql.SQL( + """ + SELECT id, author_id, filename, language_requested, detected_language, duration_sec, status, created_at, updated_at + FROM {table} + {where} + ORDER BY created_at DESC + LIMIT %s OFFSET %s; + """ + ).format(table=sql.Identifier(table), where=where_sql) + + with connect() as conn: + with conn.cursor() as cur: + cur.execute(count_q, params) + total = int(cur.fetchone()["cnt"]) + cur.execute(list_q, params + [limit, offset]) + rows = cur.fetchall() + return {"total": total, "items": rows} + + +def get_record(record_id: int) -> dict[str, Any] | None: + table = _table_name() + q = sql.SQL("SELECT * FROM {table} WHERE id = %s;").format(table=sql.Identifier(table)) + with connect() as conn: + with conn.cursor() as cur: + cur.execute(q, (int(record_id),)) + row = cur.fetchone() + return row + + +def update_record( + record_id: int, + *, + author_id: str | None = None, + text: str | None = None, + status: str | None = None, +) -> dict[str, Any] | None: + table = _table_name() + sets: list[sql.SQL] = [] + params: list[Any] = [] + + if author_id is not None: + sets.append(sql.SQL("author_id = %s")) + params.append(author_id) + if text is not None: + sets.append(sql.SQL("text = %s")) + params.append(text) + if status is not None: + sets.append(sql.SQL("status = %s")) + params.append(status) + + if not sets: + return get_record(int(record_id)) + + sets.append(sql.SQL("updated_at = now()")) + q = sql.SQL("UPDATE {table} SET {sets} WHERE id = %s RETURNING *;").format( + table=sql.Identifier(table), sets=sql.SQL(", ").join(sets) + ) + params.append(int(record_id)) + + with connect() as conn: + with conn.cursor() as cur: + cur.execute(q, params) + row = cur.fetchone() + conn.commit() + return row + + +def delete_record(record_id: int) -> bool: + table = _table_name() + q = sql.SQL("DELETE FROM {table} WHERE id = %s;").format(table=sql.Identifier(table)) + with connect() as conn: + with conn.cursor() as cur: + cur.execute(q, (int(record_id),)) + deleted = cur.rowcount > 0 + conn.commit() + return deleted + diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..9abe7aa --- /dev/null +++ b/app/main.py @@ -0,0 +1,484 @@ +from __future__ import annotations + +import dataclasses +import mimetypes +import os +import tempfile +import threading +import time +import re +from pathlib import Path +from typing import Any +from uuid import uuid4 + +from dotenv import load_dotenv +from fastapi import FastAPI, File, Form, HTTPException, UploadFile +from fastapi.responses import HTMLResponse +from fastapi.staticfiles import StaticFiles +from pydantic import BaseModel + +from . import db +from .stt import transcribe_file, transcribe_iter + + +load_dotenv() + +APP_ROOT = Path(__file__).resolve().parent +PROJECT_ROOT = APP_ROOT.parent +STATIC_DIR = APP_ROOT / "static" +UPLOAD_DIR = PROJECT_ROOT / "resources" / "uploads" + +ALLOWED_EXTS = {".mp3", ".m4a", ".wav", ".mp4", ".aac", ".ogg", ".flac", ".webm"} +ALLOWED_MIME_PREFIXES = ("audio/",) +ALLOWED_MIMES = { + "video/mp4", # m4a가 video/mp4로 인식되는 경우가 흔함 + "application/octet-stream", # 일부 브라우저/OS 조합 +} + +MAX_UPLOAD_MB = int(os.getenv("APP_MAX_UPLOAD_MB", "200")) +MAX_UPLOAD_BYTES = MAX_UPLOAD_MB * 1024 * 1024 + +JOB_TTL_SEC = int(os.getenv("APP_JOB_TTL_SEC", "3600")) +DEFAULT_AUTHOR_ID = os.getenv("APP_DEFAULT_AUTHOR_ID", "dosangyoon@gmail.com").strip() or "dosangyoon@gmail.com" + + +app = FastAPI(title="Web STT") +app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static") + + +@app.on_event("startup") +def _startup() -> None: + # .env 기반으로 DB 테이블 자동 생성 + db.init_db() + UPLOAD_DIR.mkdir(parents=True, exist_ok=True) + + +@dataclasses.dataclass +class _Job: + job_id: str + filename: str + tmp_path: str + language: str | None + vad_filter: bool + beam_size: int + author_id: str + language_requested: str | None + status: str = "queued" # queued|running|completed|failed|cancelled + progress: float | None = 0.0 + text: str = "" + segments: list[dict[str, Any]] = dataclasses.field(default_factory=list) + detected_language: str | None = None + language_probability: float | None = None + duration_sec: float | None = None + error: str | None = None + created_at: float = dataclasses.field(default_factory=time.time) + updated_at: float = dataclasses.field(default_factory=time.time) + cancel_event: threading.Event = dataclasses.field(default_factory=threading.Event, repr=False) + + +_JOBS: dict[str, _Job] = {} +_JOBS_LOCK = threading.Lock() + + +def _cleanup_jobs(now: float | None = None) -> None: + now = time.time() if now is None else now + to_delete: list[str] = [] + with _JOBS_LOCK: + for job_id, job in _JOBS.items(): + if job.status in ("running", "queued"): + continue + if now - job.updated_at > JOB_TTL_SEC: + to_delete.append(job_id) + for job_id in to_delete: + job = _JOBS.pop(job_id, None) + if job is None: + continue + try: + os.remove(job.tmp_path) + except OSError: + pass + + +def _job_public(job: _Job) -> dict[str, Any]: + return { + "job_id": job.job_id, + "filename": job.filename, + "status": job.status, + "progress": job.progress, + "text": job.text, + "segments": job.segments, + "detected_language": job.detected_language, + "language_probability": job.language_probability, + "duration_sec": job.duration_sec, + "error": job.error, + "created_at": job.created_at, + "updated_at": job.updated_at, + } + + +@app.get("/", response_class=HTMLResponse) +def index() -> HTMLResponse: + index_path = STATIC_DIR / "index.html" + return HTMLResponse(index_path.read_text(encoding="utf-8")) + + +@app.post("/api/jobs") +async def api_create_job( + file: UploadFile = File(...), + language: str = Form(default="ko"), + vad_filter: bool = Form(default=True), + beam_size: int = Form(default=5), + author_id: str = Form(default=DEFAULT_AUTHOR_ID), +) -> dict[str, Any]: + _cleanup_jobs() + _validate_upload(file) + job_id = str(uuid4()) + saved_path = await _save_upload(file, file_id=job_id) + + lang = language.strip().lower() + if lang in ("", "auto"): + lang = "" + + job = _Job( + job_id=job_id, + filename=file.filename, + tmp_path=saved_path, + language=(lang or None), + vad_filter=bool(vad_filter), + beam_size=int(beam_size), + author_id=(author_id.strip() or DEFAULT_AUTHOR_ID), + language_requested=(language.strip() or None), + status="queued", + ) + + with _JOBS_LOCK: + _JOBS[job_id] = job + + threading.Thread(target=_run_job, args=(job_id,), daemon=True).start() + return {"job_id": job_id} + + +@app.get("/api/jobs/{job_id}") +def api_get_job(job_id: str) -> dict[str, Any]: + _cleanup_jobs() + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + raise HTTPException(status_code=404, detail="job not found") + job.updated_at = time.time() + return _job_public(job) + + +@app.post("/api/jobs/{job_id}/cancel") +def api_cancel_job(job_id: str) -> dict[str, Any]: + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + raise HTTPException(status_code=404, detail="job not found") + if job.status in ("completed", "failed", "cancelled"): + return _job_public(job) + job.cancel_event.set() + job.updated_at = time.time() + return _job_public(job) + + +@app.post("/api/transcribe") +async def api_transcribe( + file: UploadFile = File(...), + language: str = Form(default="ko"), + vad_filter: bool = Form(default=True), + beam_size: int = Form(default=5), + author_id: str = Form(default=DEFAULT_AUTHOR_ID), +) -> dict[str, Any]: + _validate_upload(file) + + try: + file_id = str(uuid4()) + saved_path = await _save_upload(file, file_id=file_id) + lang = language.strip().lower() + if lang in ("", "auto"): + lang = "" + result = transcribe_file( + saved_path, + language=(lang or None), + vad_filter=bool(vad_filter), + beam_size=int(beam_size), + ) + # 단발성 API도 DB 저장 + try: + db.insert_record( + author_id=(author_id.strip() or DEFAULT_AUTHOR_ID), + filename=file.filename, + language_requested=(language.strip() or None), + detected_language=result.get("detected_language"), + language_probability=result.get("language_probability"), + duration_sec=result.get("duration_sec"), + status="completed", + text=result.get("text") or "", + segments=result.get("segments") or [], + ) + except Exception: + pass + return result + finally: + # 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음) + pass + + +@app.get("/healthz") +def healthz() -> dict[str, str]: + return {"status": "ok"} + + +@app.get("/api/records") +def api_list_records(limit: int = 50, offset: int = 0, author_id: str | None = None, q: str | None = None) -> dict[str, Any]: + return db.list_records(limit=limit, offset=offset, author_id=author_id, q=q) + + +@app.get("/api/records/{record_id}") +def api_get_record(record_id: int) -> dict[str, Any]: + row = db.get_record(int(record_id)) + if row is None: + raise HTTPException(status_code=404, detail="record not found") + return row + + +class _UpdateRecordIn(BaseModel): + author_id: str | None = None + text: str | None = None + status: str | None = None + + +@app.put("/api/records/{record_id}") +def api_update_record(record_id: int, payload: _UpdateRecordIn) -> dict[str, Any]: + author_id = payload.author_id + if author_id is not None: + author_id = author_id.strip() + if not author_id: + raise HTTPException(status_code=400, detail="author_id는 비울 수 없습니다.") + + row = db.update_record(int(record_id), author_id=author_id, text=payload.text, status=payload.status) + if row is None: + raise HTTPException(status_code=404, detail="record not found") + return row + + +@app.delete("/api/records/{record_id}") +def api_delete_record(record_id: int) -> dict[str, Any]: + ok = db.delete_record(int(record_id)) + if not ok: + raise HTTPException(status_code=404, detail="record not found") + return {"deleted": True} + + +def _validate_upload(file: UploadFile) -> None: + if not file or not file.filename: + raise HTTPException(status_code=400, detail="파일이 필요합니다.") + + ext = Path(file.filename).suffix.lower() + if ext not in ALLOWED_EXTS: + raise HTTPException( + status_code=415, + detail=f"허용되지 않는 확장자입니다: {ext}. 허용: {sorted(ALLOWED_EXTS)}", + ) + + content_type = (file.content_type or "").lower().strip() + guessed, _ = mimetypes.guess_type(file.filename) + guessed = (guessed or "").lower() + + def ok_mime(m: str) -> bool: + return (m.startswith(ALLOWED_MIME_PREFIXES)) or (m in ALLOWED_MIMES) + + if content_type and not ok_mime(content_type) and guessed and not ok_mime(guessed): + raise HTTPException( + status_code=415, + detail=f"오디오 파일만 업로드 가능합니다. content-type={content_type}, guessed={guessed}", + ) + + +_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9._-]+") + + +def _safe_filename(name: str) -> str: + base = Path(name).name # path traversal 방지 + base = base.strip().replace(" ", "_") + base = _FILENAME_SAFE_RE.sub("_", base) + if not base: + return "upload.bin" + if len(base) > 120: + stem = Path(base).stem[:100] + suf = Path(base).suffix[:20] + base = f"{stem}{suf}" + return base + + +async def _save_upload(file: UploadFile, *, file_id: str) -> str: + UPLOAD_DIR.mkdir(parents=True, exist_ok=True) + safe = _safe_filename(file.filename or "upload.bin") + out_path = UPLOAD_DIR / f"{file_id}_{safe}" + tmp_path = str(out_path) + with open(tmp_path, "wb") as tmp: + total = 0 + while True: + chunk = await file.read(1024 * 1024) + if not chunk: + break + total += len(chunk) + if total > MAX_UPLOAD_BYTES: + try: + os.remove(tmp_path) + except OSError: + pass + raise HTTPException( + status_code=413, + detail=f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_MB}MB 까지 업로드 가능합니다.", + ) + tmp.write(chunk) + return tmp_path + + +def _run_job(job_id: str) -> None: + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + return + job.status = "running" + job.progress = 0.0 + job.updated_at = time.time() + + tmp_path: str | None = None + cancelled = False + try: + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + return + tmp_path = job.tmp_path + language = job.language + vad_filter = job.vad_filter + beam_size = job.beam_size + author_id = job.author_id + language_requested = job.language_requested + filename = job.filename + + segments_iter, info = transcribe_iter( + tmp_path, + language=language, + vad_filter=vad_filter, + beam_size=beam_size, + ) + + duration = getattr(info, "duration", None) + detected_language = getattr(info, "language", None) + language_probability = getattr(info, "language_probability", None) + + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + return + job.duration_sec = duration + job.detected_language = detected_language + job.language_probability = language_probability + job.updated_at = time.time() + + texts: list[str] = [] + for s in segments_iter: + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + return + if job.cancel_event.is_set(): + job.status = "cancelled" + job.updated_at = time.time() + cancelled = True + break + + seg_text = (getattr(s, "text", "") or "").strip() + if not seg_text: + continue + + seg = { + "start": float(getattr(s, "start", 0.0)), + "end": float(getattr(s, "end", 0.0)), + "text": seg_text, + } + texts.append(seg_text) + + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + return + if job.cancel_event.is_set(): + job.status = "cancelled" + job.updated_at = time.time() + cancelled = True + break + job.segments.append(seg) + job.text = "\n".join(texts).strip() + if job.duration_sec and job.duration_sec > 0: + job.progress = max(0.0, min(0.999, float(seg["end"]) / float(job.duration_sec))) + else: + job.progress = None + job.updated_at = time.time() + + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + return + if cancelled or job.cancel_event.is_set(): + job.status = "cancelled" + else: + job.status = "completed" + job.progress = 1.0 + job.updated_at = time.time() + + # DB 저장 (완료/취소 모두 저장) + try: + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + return + db.insert_record( + author_id=author_id, + filename=filename, + language_requested=language_requested, + detected_language=job.detected_language, + language_probability=job.language_probability, + duration_sec=job.duration_sec, + status=job.status, + text=job.text, + segments=job.segments, + ) + except Exception: + pass + + except Exception as e: + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + return + job.status = "failed" + job.error = str(e) + job.updated_at = time.time() + try: + with _JOBS_LOCK: + job = _JOBS.get(job_id) + if job is None: + return + db.insert_record( + author_id=getattr(job, "author_id", DEFAULT_AUTHOR_ID), + filename=getattr(job, "filename", None), + language_requested=getattr(job, "language_requested", None), + detected_language=job.detected_language, + language_probability=job.language_probability, + duration_sec=job.duration_sec, + status="failed", + text=job.text, + segments=job.segments, + ) + except Exception: + pass + finally: + # 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음) + pass + diff --git a/app/static/index.html b/app/static/index.html new file mode 100644 index 0000000..7c725da --- /dev/null +++ b/app/static/index.html @@ -0,0 +1,895 @@ + + + + + + Web STT + + + +
+
+
+

Web STT

+
mp3/m4a 등 음성파일 업로드 → 텍스트 변환
+
+
+
+ + +
+ +
서버 상태 확인 중…
+
+
+ +
+
+
+ + + + + + +
+
+ + +
+
+ + +
+
+ + + +
+ + + + +
+ +
+ - 허용: mp3, m4a, wav, mp4, aac, ogg, flac, webm
+ - 첫 실행 시 Whisper 모델 다운로드로 시간이 걸릴 수 있습니다. +
+ +
+ +
0%
+
+
+
+
+ +
+
+
결과 대기 중
+
+
+ + + + +
+
+
+ +
+
+
+
+
+ + +
+
+ + +
+
+
+ + +
+
+
+ + + + + + + + + + + + +
id파일상태작성자생성작업
+
+
+ +
+
+
레코드 선택 없음
+
+ + +
+
+ + + + + + + + + +
+
+
+
+ + + + + diff --git a/app/stt.py b/app/stt.py new file mode 100644 index 0000000..1f60644 --- /dev/null +++ b/app/stt.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import Any, Iterable, Tuple + +from faster_whisper import WhisperModel + + +@dataclass(frozen=True) +class SegmentOut: + start: float + end: float + text: str + + +_MODEL: WhisperModel | None = None + + +def _get_model() -> WhisperModel: + global _MODEL + if _MODEL is not None: + return _MODEL + + model_name = os.getenv("APP_WHISPER_MODEL", "small") + device = os.getenv("APP_WHISPER_DEVICE", "cpu") + compute_type = os.getenv("APP_WHISPER_COMPUTE_TYPE", "int8") + + # WhisperModel download/cache handled by faster-whisper internally. + _MODEL = WhisperModel(model_name, device=device, compute_type=compute_type) + return _MODEL + + +def transcribe_iter( + audio_path: str, + *, + language: str | None = None, + vad_filter: bool = True, + beam_size: int = 5, +) -> Tuple[Iterable[Any], Any]: + model = _get_model() + segments_iter, info = model.transcribe( + audio_path, + language=language, + vad_filter=vad_filter, + beam_size=beam_size, + ) + return segments_iter, info + + +def transcribe_file( + audio_path: str, + *, + language: str | None = None, + vad_filter: bool = True, + beam_size: int = 5, +) -> dict[str, Any]: + segments_iter, info = transcribe_iter( + audio_path, + language=language, + vad_filter=vad_filter, + beam_size=beam_size, + ) + + segments: list[SegmentOut] = [] + texts: list[str] = [] + for s in _iter_segments(segments_iter): + seg = SegmentOut(start=float(s.start), end=float(s.end), text=(s.text or "").strip()) + if seg.text: + segments.append(seg) + texts.append(seg.text) + + full_text = "\n".join(texts).strip() + return { + "text": full_text, + "segments": [seg.__dict__ for seg in segments], + "detected_language": getattr(info, "language", None), + "language_probability": getattr(info, "language_probability", None), + "duration_sec": getattr(info, "duration", None), + } + + +def _iter_segments(segments_iter: Iterable[Any]) -> Iterable[Any]: + for s in segments_iter: + yield s + diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..69e6e09 --- /dev/null +++ b/environment.yml @@ -0,0 +1,10 @@ +name: ncue +channels: + - conda-forge +dependencies: + - python>=3.10,<3.13 + - pip + - ffmpeg + - pip: + - -r requirements.txt + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..dfc83d6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +fastapi +uvicorn[standard] +python-multipart +pydantic +faster-whisper +psycopg[binary] +python-dotenv diff --git a/resources/uploads/.gitkeep b/resources/uploads/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/resources/uploads/.gitkeep @@ -0,0 +1 @@ + diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..be75fe6 --- /dev/null +++ b/run.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -z "${BASH_VERSION:-}" ]]; then + echo "ERROR: bash로 실행하세요. 예) ./run.sh 또는 bash run.sh" >&2 + exit 1 +fi + +cd /home/dsyoon/workspace/stt + +CONDA_BASE="/home/dsyoon/workspace/miniconda3" +source "${CONDA_BASE}/bin/activate" ncue + +# Torch/ctranslate2 런타임 로딩에 conda lib 경로 필요할 수 있음 +export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH:-}" + +PORT="${PORT:-8025}" +RELOAD="${RELOAD:-0}" + +if lsof -ti tcp:"${PORT}" >/dev/null 2>&1; then + echo "Stopping existing server on port ${PORT}..." + lsof -ti tcp:"${PORT}" | xargs -r kill -9 + sleep 1 +fi + +UVICORN_ARGS=("--host" "127.0.0.1" "--port" "${PORT}") +if [[ "${RELOAD}" == "1" ]]; then + UVICORN_ARGS+=("--reload") +fi + +echo "Starting uvicorn on 127.0.0.1:${PORT} (reload=${RELOAD})" +nohup uvicorn app.main:app "${UVICORN_ARGS[@]}" > server.log 2>&1 & +echo "Server started (PID: $!). Logs: server.log" diff --git a/sql/create_ncue_stt.sql b/sql/create_ncue_stt.sql new file mode 100644 index 0000000..ffbc0d9 --- /dev/null +++ b/sql/create_ncue_stt.sql @@ -0,0 +1,21 @@ +-- ncue_stt 테이블 생성 (PostgreSQL) +-- 주의: 테이블명은 .env의 TABLE 값과 동일해야 합니다. + +CREATE TABLE IF NOT EXISTS ncue_stt ( + id BIGSERIAL PRIMARY KEY, + author_id TEXT NOT NULL, + filename TEXT, + language_requested TEXT, + detected_language TEXT, + language_probability DOUBLE PRECISION, + duration_sec DOUBLE PRECISION, + status TEXT NOT NULL DEFAULT 'completed', + text TEXT NOT NULL DEFAULT '', + segments JSONB NOT NULL DEFAULT '[]'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE INDEX IF NOT EXISTS ncue_stt_author_id_idx ON ncue_stt(author_id); +CREATE INDEX IF NOT EXISTS ncue_stt_created_at_idx ON ncue_stt(created_at DESC); +