Initial commit after re-install

This commit is contained in:
2026-02-25 19:07:56 +09:00
commit 7adaa73102
12 changed files with 1848 additions and 0 deletions

2
app/__init__.py Normal file
View File

@@ -0,0 +1,2 @@
__all__ = []

229
app/db.py Normal file
View File

@@ -0,0 +1,229 @@
from __future__ import annotations
import os
import re
from typing import Any, Iterable
import psycopg
from psycopg import sql
from psycopg.rows import dict_row
from psycopg.types.json import Json
_IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
def _table_name() -> str:
name = os.getenv("TABLE", "ncue_stt").strip()
if not _IDENT_RE.match(name):
raise RuntimeError("TABLE 환경변수가 올바르지 않습니다.")
return name
def _conninfo() -> str:
host = os.getenv("DB_HOST", "").strip()
port = os.getenv("DB_PORT", "5432").strip()
dbname = os.getenv("DB_NAME", "").strip()
user = os.getenv("DB_USER", "").strip()
password = os.getenv("DB_PASSWORD", "").strip()
sslmode = os.getenv("DB_SSLMODE", "").strip() # optional
missing = [k for k, v in (("DB_HOST", host), ("DB_NAME", dbname), ("DB_USER", user), ("DB_PASSWORD", password)) if not v]
if missing:
raise RuntimeError(f"DB 환경변수 누락: {', '.join(missing)}")
parts = [
f"host={host}",
f"port={port}",
f"dbname={dbname}",
f"user={user}",
f"password={password}",
]
if sslmode:
parts.append(f"sslmode={sslmode}")
return " ".join(parts)
def connect() -> psycopg.Connection[Any]:
return psycopg.connect(_conninfo(), row_factory=dict_row, connect_timeout=5)
def init_db() -> None:
table = _table_name()
create_sql = sql.SQL(
"""
CREATE TABLE IF NOT EXISTS {table} (
id BIGSERIAL PRIMARY KEY,
author_id TEXT NOT NULL,
filename TEXT,
language_requested TEXT,
detected_language TEXT,
language_probability DOUBLE PRECISION,
duration_sec DOUBLE PRECISION,
status TEXT NOT NULL DEFAULT 'completed',
text TEXT NOT NULL DEFAULT '',
segments JSONB NOT NULL DEFAULT '[]'::jsonb,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
"""
).format(table=sql.Identifier(table))
idx1 = sql.SQL("CREATE INDEX IF NOT EXISTS {idx} ON {table}(author_id);").format(
idx=sql.Identifier(f"{table}_author_id_idx"), table=sql.Identifier(table)
)
idx2 = sql.SQL("CREATE INDEX IF NOT EXISTS {idx} ON {table}(created_at DESC);").format(
idx=sql.Identifier(f"{table}_created_at_idx"), table=sql.Identifier(table)
)
with connect() as conn:
with conn.cursor() as cur:
cur.execute(create_sql)
cur.execute(idx1)
cur.execute(idx2)
conn.commit()
def insert_record(
*,
author_id: str,
filename: str | None,
language_requested: str | None,
detected_language: str | None,
language_probability: float | None,
duration_sec: float | None,
status: str,
text: str,
segments: list[dict[str, Any]],
) -> int:
table = _table_name()
q = sql.SQL(
"""
INSERT INTO {table}
(author_id, filename, language_requested, detected_language, language_probability, duration_sec, status, text, segments)
VALUES
(%s, %s, %s, %s, %s, %s, %s, %s, %s)
RETURNING id;
"""
).format(table=sql.Identifier(table))
with connect() as conn:
with conn.cursor() as cur:
cur.execute(
q,
(
author_id,
filename,
language_requested,
detected_language,
language_probability,
duration_sec,
status,
text or "",
Json(segments or []),
),
)
row = cur.fetchone()
conn.commit()
return int(row["id"])
def list_records(*, limit: int = 50, offset: int = 0, author_id: str | None = None, q: str | None = None) -> dict[str, Any]:
table = _table_name()
limit = max(1, min(int(limit), 200))
offset = max(0, int(offset))
where_parts: list[sql.SQL] = []
params: list[Any] = []
if author_id:
where_parts.append(sql.SQL("author_id = %s"))
params.append(author_id)
if q:
where_parts.append(sql.SQL("(filename ILIKE %s OR text ILIKE %s)"))
params.extend([f"%{q}%", f"%{q}%"])
where_sql = sql.SQL("")
if where_parts:
where_sql = sql.SQL("WHERE ") + sql.SQL(" AND ").join(where_parts)
count_q = sql.SQL("SELECT count(*)::bigint AS cnt FROM {table} {where};").format(
table=sql.Identifier(table), where=where_sql
)
list_q = sql.SQL(
"""
SELECT id, author_id, filename, language_requested, detected_language, duration_sec, status, created_at, updated_at
FROM {table}
{where}
ORDER BY created_at DESC
LIMIT %s OFFSET %s;
"""
).format(table=sql.Identifier(table), where=where_sql)
with connect() as conn:
with conn.cursor() as cur:
cur.execute(count_q, params)
total = int(cur.fetchone()["cnt"])
cur.execute(list_q, params + [limit, offset])
rows = cur.fetchall()
return {"total": total, "items": rows}
def get_record(record_id: int) -> dict[str, Any] | None:
table = _table_name()
q = sql.SQL("SELECT * FROM {table} WHERE id = %s;").format(table=sql.Identifier(table))
with connect() as conn:
with conn.cursor() as cur:
cur.execute(q, (int(record_id),))
row = cur.fetchone()
return row
def update_record(
record_id: int,
*,
author_id: str | None = None,
text: str | None = None,
status: str | None = None,
) -> dict[str, Any] | None:
table = _table_name()
sets: list[sql.SQL] = []
params: list[Any] = []
if author_id is not None:
sets.append(sql.SQL("author_id = %s"))
params.append(author_id)
if text is not None:
sets.append(sql.SQL("text = %s"))
params.append(text)
if status is not None:
sets.append(sql.SQL("status = %s"))
params.append(status)
if not sets:
return get_record(int(record_id))
sets.append(sql.SQL("updated_at = now()"))
q = sql.SQL("UPDATE {table} SET {sets} WHERE id = %s RETURNING *;").format(
table=sql.Identifier(table), sets=sql.SQL(", ").join(sets)
)
params.append(int(record_id))
with connect() as conn:
with conn.cursor() as cur:
cur.execute(q, params)
row = cur.fetchone()
conn.commit()
return row
def delete_record(record_id: int) -> bool:
table = _table_name()
q = sql.SQL("DELETE FROM {table} WHERE id = %s;").format(table=sql.Identifier(table))
with connect() as conn:
with conn.cursor() as cur:
cur.execute(q, (int(record_id),))
deleted = cur.rowcount > 0
conn.commit()
return deleted

484
app/main.py Normal file
View File

@@ -0,0 +1,484 @@
from __future__ import annotations
import dataclasses
import mimetypes
import os
import tempfile
import threading
import time
import re
from pathlib import Path
from typing import Any
from uuid import uuid4
from dotenv import load_dotenv
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from . import db
from .stt import transcribe_file, transcribe_iter
load_dotenv()
APP_ROOT = Path(__file__).resolve().parent
PROJECT_ROOT = APP_ROOT.parent
STATIC_DIR = APP_ROOT / "static"
UPLOAD_DIR = PROJECT_ROOT / "resources" / "uploads"
ALLOWED_EXTS = {".mp3", ".m4a", ".wav", ".mp4", ".aac", ".ogg", ".flac", ".webm"}
ALLOWED_MIME_PREFIXES = ("audio/",)
ALLOWED_MIMES = {
"video/mp4", # m4a가 video/mp4로 인식되는 경우가 흔함
"application/octet-stream", # 일부 브라우저/OS 조합
}
MAX_UPLOAD_MB = int(os.getenv("APP_MAX_UPLOAD_MB", "200"))
MAX_UPLOAD_BYTES = MAX_UPLOAD_MB * 1024 * 1024
JOB_TTL_SEC = int(os.getenv("APP_JOB_TTL_SEC", "3600"))
DEFAULT_AUTHOR_ID = os.getenv("APP_DEFAULT_AUTHOR_ID", "dosangyoon@gmail.com").strip() or "dosangyoon@gmail.com"
app = FastAPI(title="Web STT")
app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
@app.on_event("startup")
def _startup() -> None:
# .env 기반으로 DB 테이블 자동 생성
db.init_db()
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
@dataclasses.dataclass
class _Job:
job_id: str
filename: str
tmp_path: str
language: str | None
vad_filter: bool
beam_size: int
author_id: str
language_requested: str | None
status: str = "queued" # queued|running|completed|failed|cancelled
progress: float | None = 0.0
text: str = ""
segments: list[dict[str, Any]] = dataclasses.field(default_factory=list)
detected_language: str | None = None
language_probability: float | None = None
duration_sec: float | None = None
error: str | None = None
created_at: float = dataclasses.field(default_factory=time.time)
updated_at: float = dataclasses.field(default_factory=time.time)
cancel_event: threading.Event = dataclasses.field(default_factory=threading.Event, repr=False)
_JOBS: dict[str, _Job] = {}
_JOBS_LOCK = threading.Lock()
def _cleanup_jobs(now: float | None = None) -> None:
now = time.time() if now is None else now
to_delete: list[str] = []
with _JOBS_LOCK:
for job_id, job in _JOBS.items():
if job.status in ("running", "queued"):
continue
if now - job.updated_at > JOB_TTL_SEC:
to_delete.append(job_id)
for job_id in to_delete:
job = _JOBS.pop(job_id, None)
if job is None:
continue
try:
os.remove(job.tmp_path)
except OSError:
pass
def _job_public(job: _Job) -> dict[str, Any]:
return {
"job_id": job.job_id,
"filename": job.filename,
"status": job.status,
"progress": job.progress,
"text": job.text,
"segments": job.segments,
"detected_language": job.detected_language,
"language_probability": job.language_probability,
"duration_sec": job.duration_sec,
"error": job.error,
"created_at": job.created_at,
"updated_at": job.updated_at,
}
@app.get("/", response_class=HTMLResponse)
def index() -> HTMLResponse:
index_path = STATIC_DIR / "index.html"
return HTMLResponse(index_path.read_text(encoding="utf-8"))
@app.post("/api/jobs")
async def api_create_job(
file: UploadFile = File(...),
language: str = Form(default="ko"),
vad_filter: bool = Form(default=True),
beam_size: int = Form(default=5),
author_id: str = Form(default=DEFAULT_AUTHOR_ID),
) -> dict[str, Any]:
_cleanup_jobs()
_validate_upload(file)
job_id = str(uuid4())
saved_path = await _save_upload(file, file_id=job_id)
lang = language.strip().lower()
if lang in ("", "auto"):
lang = ""
job = _Job(
job_id=job_id,
filename=file.filename,
tmp_path=saved_path,
language=(lang or None),
vad_filter=bool(vad_filter),
beam_size=int(beam_size),
author_id=(author_id.strip() or DEFAULT_AUTHOR_ID),
language_requested=(language.strip() or None),
status="queued",
)
with _JOBS_LOCK:
_JOBS[job_id] = job
threading.Thread(target=_run_job, args=(job_id,), daemon=True).start()
return {"job_id": job_id}
@app.get("/api/jobs/{job_id}")
def api_get_job(job_id: str) -> dict[str, Any]:
_cleanup_jobs()
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
raise HTTPException(status_code=404, detail="job not found")
job.updated_at = time.time()
return _job_public(job)
@app.post("/api/jobs/{job_id}/cancel")
def api_cancel_job(job_id: str) -> dict[str, Any]:
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
raise HTTPException(status_code=404, detail="job not found")
if job.status in ("completed", "failed", "cancelled"):
return _job_public(job)
job.cancel_event.set()
job.updated_at = time.time()
return _job_public(job)
@app.post("/api/transcribe")
async def api_transcribe(
file: UploadFile = File(...),
language: str = Form(default="ko"),
vad_filter: bool = Form(default=True),
beam_size: int = Form(default=5),
author_id: str = Form(default=DEFAULT_AUTHOR_ID),
) -> dict[str, Any]:
_validate_upload(file)
try:
file_id = str(uuid4())
saved_path = await _save_upload(file, file_id=file_id)
lang = language.strip().lower()
if lang in ("", "auto"):
lang = ""
result = transcribe_file(
saved_path,
language=(lang or None),
vad_filter=bool(vad_filter),
beam_size=int(beam_size),
)
# 단발성 API도 DB 저장
try:
db.insert_record(
author_id=(author_id.strip() or DEFAULT_AUTHOR_ID),
filename=file.filename,
language_requested=(language.strip() or None),
detected_language=result.get("detected_language"),
language_probability=result.get("language_probability"),
duration_sec=result.get("duration_sec"),
status="completed",
text=result.get("text") or "",
segments=result.get("segments") or [],
)
except Exception:
pass
return result
finally:
# 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음)
pass
@app.get("/healthz")
def healthz() -> dict[str, str]:
return {"status": "ok"}
@app.get("/api/records")
def api_list_records(limit: int = 50, offset: int = 0, author_id: str | None = None, q: str | None = None) -> dict[str, Any]:
return db.list_records(limit=limit, offset=offset, author_id=author_id, q=q)
@app.get("/api/records/{record_id}")
def api_get_record(record_id: int) -> dict[str, Any]:
row = db.get_record(int(record_id))
if row is None:
raise HTTPException(status_code=404, detail="record not found")
return row
class _UpdateRecordIn(BaseModel):
author_id: str | None = None
text: str | None = None
status: str | None = None
@app.put("/api/records/{record_id}")
def api_update_record(record_id: int, payload: _UpdateRecordIn) -> dict[str, Any]:
author_id = payload.author_id
if author_id is not None:
author_id = author_id.strip()
if not author_id:
raise HTTPException(status_code=400, detail="author_id는 비울 수 없습니다.")
row = db.update_record(int(record_id), author_id=author_id, text=payload.text, status=payload.status)
if row is None:
raise HTTPException(status_code=404, detail="record not found")
return row
@app.delete("/api/records/{record_id}")
def api_delete_record(record_id: int) -> dict[str, Any]:
ok = db.delete_record(int(record_id))
if not ok:
raise HTTPException(status_code=404, detail="record not found")
return {"deleted": True}
def _validate_upload(file: UploadFile) -> None:
if not file or not file.filename:
raise HTTPException(status_code=400, detail="파일이 필요합니다.")
ext = Path(file.filename).suffix.lower()
if ext not in ALLOWED_EXTS:
raise HTTPException(
status_code=415,
detail=f"허용되지 않는 확장자입니다: {ext}. 허용: {sorted(ALLOWED_EXTS)}",
)
content_type = (file.content_type or "").lower().strip()
guessed, _ = mimetypes.guess_type(file.filename)
guessed = (guessed or "").lower()
def ok_mime(m: str) -> bool:
return (m.startswith(ALLOWED_MIME_PREFIXES)) or (m in ALLOWED_MIMES)
if content_type and not ok_mime(content_type) and guessed and not ok_mime(guessed):
raise HTTPException(
status_code=415,
detail=f"오디오 파일만 업로드 가능합니다. content-type={content_type}, guessed={guessed}",
)
_FILENAME_SAFE_RE = re.compile(r"[^A-Za-z0-9._-]+")
def _safe_filename(name: str) -> str:
base = Path(name).name # path traversal 방지
base = base.strip().replace(" ", "_")
base = _FILENAME_SAFE_RE.sub("_", base)
if not base:
return "upload.bin"
if len(base) > 120:
stem = Path(base).stem[:100]
suf = Path(base).suffix[:20]
base = f"{stem}{suf}"
return base
async def _save_upload(file: UploadFile, *, file_id: str) -> str:
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
safe = _safe_filename(file.filename or "upload.bin")
out_path = UPLOAD_DIR / f"{file_id}_{safe}"
tmp_path = str(out_path)
with open(tmp_path, "wb") as tmp:
total = 0
while True:
chunk = await file.read(1024 * 1024)
if not chunk:
break
total += len(chunk)
if total > MAX_UPLOAD_BYTES:
try:
os.remove(tmp_path)
except OSError:
pass
raise HTTPException(
status_code=413,
detail=f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_MB}MB 까지 업로드 가능합니다.",
)
tmp.write(chunk)
return tmp_path
def _run_job(job_id: str) -> None:
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
return
job.status = "running"
job.progress = 0.0
job.updated_at = time.time()
tmp_path: str | None = None
cancelled = False
try:
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
return
tmp_path = job.tmp_path
language = job.language
vad_filter = job.vad_filter
beam_size = job.beam_size
author_id = job.author_id
language_requested = job.language_requested
filename = job.filename
segments_iter, info = transcribe_iter(
tmp_path,
language=language,
vad_filter=vad_filter,
beam_size=beam_size,
)
duration = getattr(info, "duration", None)
detected_language = getattr(info, "language", None)
language_probability = getattr(info, "language_probability", None)
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
return
job.duration_sec = duration
job.detected_language = detected_language
job.language_probability = language_probability
job.updated_at = time.time()
texts: list[str] = []
for s in segments_iter:
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
return
if job.cancel_event.is_set():
job.status = "cancelled"
job.updated_at = time.time()
cancelled = True
break
seg_text = (getattr(s, "text", "") or "").strip()
if not seg_text:
continue
seg = {
"start": float(getattr(s, "start", 0.0)),
"end": float(getattr(s, "end", 0.0)),
"text": seg_text,
}
texts.append(seg_text)
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
return
if job.cancel_event.is_set():
job.status = "cancelled"
job.updated_at = time.time()
cancelled = True
break
job.segments.append(seg)
job.text = "\n".join(texts).strip()
if job.duration_sec and job.duration_sec > 0:
job.progress = max(0.0, min(0.999, float(seg["end"]) / float(job.duration_sec)))
else:
job.progress = None
job.updated_at = time.time()
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
return
if cancelled or job.cancel_event.is_set():
job.status = "cancelled"
else:
job.status = "completed"
job.progress = 1.0
job.updated_at = time.time()
# DB 저장 (완료/취소 모두 저장)
try:
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
return
db.insert_record(
author_id=author_id,
filename=filename,
language_requested=language_requested,
detected_language=job.detected_language,
language_probability=job.language_probability,
duration_sec=job.duration_sec,
status=job.status,
text=job.text,
segments=job.segments,
)
except Exception:
pass
except Exception as e:
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
return
job.status = "failed"
job.error = str(e)
job.updated_at = time.time()
try:
with _JOBS_LOCK:
job = _JOBS.get(job_id)
if job is None:
return
db.insert_record(
author_id=getattr(job, "author_id", DEFAULT_AUTHOR_ID),
filename=getattr(job, "filename", None),
language_requested=getattr(job, "language_requested", None),
detected_language=job.detected_language,
language_probability=job.language_probability,
duration_sec=job.duration_sec,
status="failed",
text=job.text,
segments=job.segments,
)
except Exception:
pass
finally:
# 업로드 파일은 resources/uploads 아래에 보관 (삭제하지 않음)
pass

895
app/static/index.html Normal file
View File

@@ -0,0 +1,895 @@
<!doctype html>
<html lang="ko">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Web STT</title>
<style>
*,
*::before,
*::after {
box-sizing: border-box;
}
:root {
color-scheme: dark;
--bg: #0b0d12;
--panel: #121624;
--muted: #9aa4b2;
--text: #e6eaf2;
--accent: #6ea8fe;
--danger: #ff6b6b;
--border: rgba(255, 255, 255, 0.08);
--field-bg: rgba(0, 0, 0, 0.18);
--shadow: 0 18px 45px rgba(0, 0, 0, 0.28);
--bg-grad-1: radial-gradient(1200px 500px at 10% 10%, rgba(110, 168, 254, 0.18), transparent 60%);
--bg-grad-2: radial-gradient(900px 420px at 80% 20%, rgba(130, 231, 171, 0.12), transparent 60%);
}
html[data-theme="light"] {
color-scheme: light;
--bg: #f7f9fc;
--panel: #ffffff;
--muted: #475467;
--text: #101828;
--accent: #2563eb;
--danger: #b42318;
--border: rgba(16, 24, 40, 0.12);
--field-bg: #f2f4f7;
--shadow: 0 18px 45px rgba(16, 24, 40, 0.08);
--bg-grad-1: radial-gradient(1200px 500px at 10% 10%, rgba(37, 99, 235, 0.12), transparent 60%);
--bg-grad-2: radial-gradient(900px 420px at 80% 20%, rgba(22, 163, 74, 0.10), transparent 60%);
}
html[data-theme="dark"] {
color-scheme: dark;
}
body {
margin: 0;
font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, "Apple SD Gothic Neo",
"Noto Sans KR", "Malgun Gothic", sans-serif;
background: var(--bg-grad-1), var(--bg-grad-2), var(--bg);
color: var(--text);
}
.wrap {
max-width: 1220px;
margin: 0 auto;
padding: 28px 18px 40px;
}
header {
display: flex;
align-items: baseline;
justify-content: space-between;
gap: 16px;
margin-bottom: 18px;
}
.header-actions {
display: flex;
align-items: center;
justify-content: flex-end;
gap: 10px;
}
.tabs {
display: inline-flex;
gap: 8px;
}
.btn.tab {
padding: 8px 10px;
border-radius: 999px;
}
.btn.tab.active {
border-color: color-mix(in oklab, var(--accent) 55%, var(--border));
background: color-mix(in oklab, var(--accent) 12%, rgba(255, 255, 255, 0.06));
}
h1 {
font-size: 18px;
font-weight: 700;
margin: 0;
letter-spacing: 0.2px;
}
.sub {
font-size: 12px;
color: var(--muted);
}
.grid {
display: grid;
grid-template-columns: 1fr;
gap: 14px;
}
@media (min-width: 920px) {
.grid {
grid-template-columns: 360px minmax(0, 1fr);
gap: 16px;
}
}
.card {
background: var(--panel);
border: 1px solid var(--border);
border-radius: 14px;
padding: 14px;
box-shadow: var(--shadow);
}
.row {
display: flex;
gap: 10px;
align-items: center;
flex-wrap: wrap;
}
label {
display: block;
font-size: 12px;
color: var(--muted);
margin: 10px 0 6px;
}
input[type="file"],
input[type="text"],
select {
width: 100%;
padding: 10px 10px;
border-radius: 10px;
border: 1px solid var(--border);
background: var(--field-bg);
color: var(--text);
outline: none;
}
input[type="checkbox"] {
transform: translateY(1px);
}
.btn {
appearance: none;
border: 1px solid var(--border);
background: rgba(255, 255, 255, 0.06);
color: var(--text);
border-radius: 12px;
padding: 10px 12px;
font-weight: 650;
cursor: pointer;
}
.btn.primary {
background: linear-gradient(180deg, rgba(110, 168, 254, 0.28), rgba(110, 168, 254, 0.16));
border-color: color-mix(in oklab, var(--accent) 55%, var(--border));
}
.btn:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.hint {
margin-top: 10px;
font-size: 12px;
color: var(--muted);
line-height: 1.45;
}
.status {
margin-top: 10px;
font-size: 12px;
color: var(--muted);
}
.progress {
margin-top: 10px;
display: flex;
align-items: center;
gap: 10px;
}
progress {
width: 100%;
height: 12px;
}
.err {
color: var(--danger);
white-space: pre-wrap;
}
textarea {
width: 100%;
min-height: 440px;
resize: vertical;
padding: 12px;
border-radius: 12px;
border: 1px solid var(--border);
background: var(--field-bg);
color: var(--text);
line-height: 1.55;
}
#out {
min-height: 560px;
}
#segments {
min-height: 260px;
}
.mono {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
}
.small {
font-size: 12px;
color: var(--muted);
}
.pill {
display: inline-flex;
align-items: center;
gap: 6px;
border: 1px solid var(--border);
border-radius: 999px;
padding: 6px 10px;
font-size: 12px;
color: var(--muted);
background: rgba(255, 255, 255, 0.04);
}
table {
width: 100%;
border-collapse: collapse;
font-size: 12px;
}
tbody tr.record-row {
cursor: pointer;
}
tbody tr.record-row:hover {
background: color-mix(in oklab, var(--accent) 6%, transparent);
}
tbody tr.record-row.selected {
background: color-mix(in oklab, var(--accent) 10%, transparent);
}
th,
td {
border-bottom: 1px solid var(--border);
padding: 10px 8px;
vertical-align: top;
}
th {
text-align: left;
color: var(--muted);
font-weight: 700;
}
.nowrap {
white-space: nowrap;
}
.truncate {
max-width: 320px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.view {
display: none;
}
.view.active {
display: block;
}
</style>
</head>
<body>
<div class="wrap">
<header>
<div>
<h1>Web STT</h1>
<div class="sub">mp3/m4a 등 음성파일 업로드 → 텍스트 변환</div>
</div>
<div class="header-actions">
<div class="tabs">
<button class="btn tab active" id="tab-stt" type="button">전사</button>
<button class="btn tab" id="tab-admin" type="button">관리</button>
</div>
<button class="btn" id="theme" type="button">테마</button>
<div class="pill" id="health">서버 상태 확인 중…</div>
</div>
</header>
<div id="view-stt" class="view active">
<div class="grid">
<section class="card">
<label>음성 파일</label>
<input id="file" type="file" accept="audio/*,.m4a,.mp3,.wav,.mp4,.aac,.ogg,.flac,.webm" />
<label>작성자 이메일(author_id)</label>
<input id="author" type="text" value="dosangyoon@gmail.com" placeholder="예: user@example.com" />
<div class="row">
<div style="flex: 1 1 160px">
<label>언어</label>
<select id="language">
<option value="ko" selected>ko (한국어, 기본)</option>
<option value="en">en (English)</option>
<option value="ja">ja (日本語)</option>
<option value="zh">zh (中文)</option>
<option value="auto">자동 감지</option>
</select>
</div>
<div style="flex: 0 0 140px">
<label>beam size</label>
<select id="beam">
<option value="1">1 (빠름)</option>
<option value="3">3</option>
<option value="5" selected>5 (기본)</option>
<option value="8">8</option>
</select>
</div>
</div>
<label>
<input id="vad" type="checkbox" checked />
VAD 필터 (무음 구간 감소)
</label>
<div class="row" style="margin-top: 12px">
<button class="btn primary" id="go" disabled>전사(STT) 실행</button>
<button class="btn" id="cancel" disabled>취소</button>
<button class="btn" id="download" disabled>TXT 다운로드</button>
<button class="btn" id="clear">초기화</button>
</div>
<div class="hint">
- 허용: mp3, m4a, wav, mp4, aac, ogg, flac, webm<br />
- 첫 실행 시 Whisper 모델 다운로드로 시간이 걸릴 수 있습니다.
</div>
<div class="progress">
<progress id="prog" max="1" value="0"></progress>
<div class="small mono" id="progText">0%</div>
</div>
<div class="status" id="status"></div>
<div class="status err" id="error"></div>
</section>
<section class="card">
<div class="row" style="justify-content: space-between">
<div class="small" id="meta">결과 대기 중</div>
<div class="small mono" id="timing"></div>
</div>
<label>전사 결과</label>
<textarea id="out" class="mono" placeholder="여기에 결과가 표시됩니다." spellcheck="false"></textarea>
<label>세그먼트(JSON)</label>
<textarea id="segments" class="mono" placeholder="세그먼트가 여기에 표시됩니다." spellcheck="false"></textarea>
</section>
</div>
</div>
<div id="view-admin" class="view">
<div class="grid">
<section class="card">
<div class="row" style="justify-content: space-between">
<div style="flex: 1 1 200px">
<label>필터(파일명/텍스트)</label>
<input id="admin-q" type="text" placeholder="검색어" />
</div>
<div style="flex: 1 1 200px">
<label>author_id</label>
<input id="admin-author" type="text" value="dosangyoon@gmail.com" />
</div>
</div>
<div class="row" style="margin-top: 10px">
<button class="btn primary" id="admin-refresh" type="button">목록 새로고침</button>
<button class="btn" id="admin-clear-filter" type="button">필터 초기화</button>
</div>
<div class="status" id="admin-status"></div>
<div style="margin-top: 10px; overflow: auto; max-height: 520px">
<table>
<thead>
<tr>
<th class="nowrap">id</th>
<th>파일</th>
<th class="nowrap">상태</th>
<th class="nowrap">작성자</th>
<th class="nowrap">생성</th>
<th class="nowrap">작업</th>
</tr>
</thead>
<tbody id="admin-tbody"></tbody>
</table>
</div>
</section>
<section class="card">
<div class="row" style="justify-content: space-between">
<div class="small" id="admin-meta">레코드 선택 없음</div>
<div class="row">
<button class="btn primary" id="admin-save" type="button" disabled>저장(수정)</button>
<button class="btn" id="admin-delete" type="button" disabled>삭제</button>
</div>
</div>
<label>author_id</label>
<input id="admin-edit-author" type="text" />
<label>status</label>
<select id="admin-edit-status">
<option value="completed">completed</option>
<option value="cancelled">cancelled</option>
<option value="failed">failed</option>
<option value="running">running</option>
<option value="queued">queued</option>
</select>
<label>text</label>
<textarea id="admin-edit-text" class="mono" style="min-height: 460px" spellcheck="false"></textarea>
</section>
</div>
</div>
</div>
<script>
const $ = (id) => document.getElementById(id);
const themeEl = $("theme");
const tabSttEl = $("tab-stt");
const tabAdminEl = $("tab-admin");
const viewSttEl = $("view-stt");
const viewAdminEl = $("view-admin");
const fileEl = $("file");
const authorEl = $("author");
const goEl = $("go");
const cancelEl = $("cancel");
const outEl = $("out");
const segEl = $("segments");
const errEl = $("error");
const statusEl = $("status");
const progEl = $("prog");
const progTextEl = $("progText");
const downloadEl = $("download");
const clearEl = $("clear");
const healthEl = $("health");
const metaEl = $("meta");
const timingEl = $("timing");
const allowedExt = [".mp3", ".m4a", ".wav", ".mp4", ".aac", ".ogg", ".flac", ".webm"];
let currentJobId = null;
let pollTimer = null;
let startedAt = null;
let uploadController = null;
let lastSegCount = 0;
let lastTextLen = 0;
// Tabs
function setTab(which) {
const isAdmin = which === "admin";
viewSttEl.classList.toggle("active", !isAdmin);
viewAdminEl.classList.toggle("active", isAdmin);
tabSttEl.classList.toggle("active", !isAdmin);
tabAdminEl.classList.toggle("active", isAdmin);
if (isAdmin) adminRefresh();
}
tabSttEl.addEventListener("click", () => setTab("stt"));
tabAdminEl.addEventListener("click", () => setTab("admin"));
function applyTheme(theme) {
const t = theme === "light" ? "light" : "dark";
document.documentElement.dataset.theme = t;
if (themeEl) themeEl.textContent = t === "dark" ? "테마: 다크" : "테마: 라이트";
}
const savedTheme = localStorage.getItem("theme");
applyTheme(savedTheme || "dark");
if (themeEl) {
themeEl.addEventListener("click", () => {
const cur = document.documentElement.dataset.theme || "dark";
const next = cur === "dark" ? "light" : "dark";
localStorage.setItem("theme", next);
applyTheme(next);
});
}
function setError(msg) {
errEl.textContent = msg || "";
}
function setStatus(msg) {
statusEl.textContent = msg || "";
}
function setProgress(p) {
if (typeof p === "number" && Number.isFinite(p)) {
const v = Math.max(0, Math.min(1, p));
progEl.value = v;
progEl.removeAttribute("data-indeterminate");
progTextEl.textContent = `${Math.round(v * 100)}%`;
} else {
// indeterminate
progEl.removeAttribute("value");
progTextEl.textContent = "…";
}
}
function setIdle() {
fileEl.disabled = false;
goEl.disabled = !fileEl.files?.length;
cancelEl.disabled = true;
downloadEl.disabled = !outEl.value?.trim();
}
function setStarting() {
fileEl.disabled = true;
goEl.disabled = true;
cancelEl.disabled = false;
downloadEl.disabled = true;
}
function setRunning() {
fileEl.disabled = true;
goEl.disabled = true;
cancelEl.disabled = false;
downloadEl.disabled = true;
}
async function checkHealth() {
try {
const r = await fetch("healthz");
if (!r.ok) throw new Error("not ok");
healthEl.textContent = "서버 정상";
} catch {
healthEl.textContent = "서버 미응답";
}
}
fileEl.addEventListener("change", () => {
setError("");
setStatus("");
const f = fileEl.files?.[0];
if (!f) {
goEl.disabled = true;
cancelEl.disabled = true;
return;
}
const name = (f.name || "").toLowerCase();
const ok = allowedExt.some((e) => name.endsWith(e));
if (!ok) {
setError(`허용되지 않는 파일 확장자입니다.\n허용: ${allowedExt.join(", ")}`);
goEl.disabled = true;
cancelEl.disabled = true;
return;
}
goEl.disabled = false;
cancelEl.disabled = true;
setStatus(`선택됨: ${f.name} (${Math.round(f.size / 1024)} KB)`);
});
clearEl.addEventListener("click", () => {
cancelCurrent();
fileEl.value = "";
outEl.value = "";
segEl.value = "";
setError("");
setStatus("");
metaEl.textContent = "결과 대기 중";
timingEl.textContent = "";
setProgress(0);
goEl.disabled = true;
cancelEl.disabled = true;
downloadEl.disabled = true;
});
downloadEl.addEventListener("click", () => {
const text = outEl.value || "";
const blob = new Blob([text], { type: "text/plain;charset=utf-8" });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = "transcript.txt";
document.body.appendChild(a);
a.click();
a.remove();
URL.revokeObjectURL(url);
});
cancelEl.addEventListener("click", () => {
cancelCurrent();
});
async function cancelCurrent() {
if (uploadController) {
try {
uploadController.abort();
} catch {}
uploadController = null;
setStatus("업로드 취소됨");
setIdle();
return;
}
if (!currentJobId) return;
try {
setStatus("취소 요청…");
await fetch(`api/jobs/${encodeURIComponent(currentJobId)}/cancel`, { method: "POST" });
} catch (e) {
setError(String(e?.message || e));
}
}
function stopPolling() {
if (pollTimer) {
clearInterval(pollTimer);
pollTimer = null;
}
}
async function pollJobOnce() {
if (!currentJobId) return;
const r = await fetch(`api/jobs/${encodeURIComponent(currentJobId)}`);
const body = await r.json().catch(() => ({}));
if (!r.ok) throw new Error(body?.detail || `HTTP ${r.status}`);
const status = body.status;
const progress = body.progress;
setProgress(progress);
const text = body.text || "";
const segs = Array.isArray(body.segments) ? body.segments : [];
if (text.length !== lastTextLen) {
outEl.value = text;
lastTextLen = text.length;
}
if (segs.length !== lastSegCount) {
segEl.value = JSON.stringify(segs, null, 2);
lastSegCount = segs.length;
}
const lang = body.detected_language ? `${body.detected_language}` : "-";
const prob = typeof body.language_probability === "number" ? body.language_probability.toFixed(3) : "-";
const dur = typeof body.duration_sec === "number" ? `${body.duration_sec.toFixed(1)}s` : "-";
metaEl.textContent = `감지 언어: ${lang} (p=${prob}), 오디오 길이: ${dur}`;
if (startedAt) {
timingEl.textContent = `${((performance.now() - startedAt) / 1000).toFixed(2)}s`;
}
if (status === "completed") {
stopPolling();
setStatus("완료");
setProgress(1);
currentJobId = null;
uploadController = null;
setIdle();
downloadEl.disabled = !outEl.value?.trim();
return;
}
if (status === "cancelled") {
stopPolling();
setStatus("취소됨");
currentJobId = null;
uploadController = null;
setIdle();
return;
}
if (status === "failed") {
stopPolling();
setStatus("실패");
setError(body?.error || "실패");
currentJobId = null;
uploadController = null;
setIdle();
return;
}
// running/queued
setRunning();
}
goEl.addEventListener("click", async () => {
const f = fileEl.files?.[0];
if (!f) return;
stopPolling();
currentJobId = null;
lastSegCount = 0;
lastTextLen = 0;
startedAt = performance.now();
setStarting();
setError("");
setStatus("업로드/작업 생성 중…");
metaEl.textContent = "처리 중…";
timingEl.textContent = "";
outEl.value = "";
segEl.value = "";
setProgress(0);
try {
const fd = new FormData();
fd.append("file", f);
const language = $("language").value;
if (language) fd.append("language", language);
const author = (authorEl?.value || "").trim();
if (author) fd.append("author_id", author);
fd.append("vad_filter", $("vad").checked ? "true" : "false");
fd.append("beam_size", $("beam").value);
uploadController = new AbortController();
const r = await fetch("api/jobs", { method: "POST", body: fd, signal: uploadController.signal });
const body = await r.json().catch(() => ({}));
if (!r.ok) {
throw new Error(body?.detail || `HTTP ${r.status}`);
}
currentJobId = body.job_id;
uploadController = null;
setStatus("전사(STT) 처리 중…");
setRunning();
await pollJobOnce();
pollTimer = setInterval(() => {
pollJobOnce().catch((e) => {
stopPolling();
setError(String(e?.message || e));
setStatus("실패");
currentJobId = null;
uploadController = null;
setIdle();
});
}, 700);
} catch (e) {
const msg = String(e?.message || e);
if (msg.includes("AbortError")) {
setStatus("업로드 취소됨");
} else {
setError(msg);
setStatus("실패");
metaEl.textContent = "오류";
}
} finally {
if (!currentJobId) {
uploadController = null;
setIdle();
}
}
});
checkHealth();
setInterval(checkHealth, 5000);
// Admin (DB Records)
const adminQEl = $("admin-q");
const adminAuthorEl = $("admin-author");
const adminRefreshEl = $("admin-refresh");
const adminClearFilterEl = $("admin-clear-filter");
const adminStatusEl = $("admin-status");
const adminTbodyEl = $("admin-tbody");
const adminMetaEl = $("admin-meta");
const adminSaveEl = $("admin-save");
const adminDeleteEl = $("admin-delete");
const adminEditAuthorEl = $("admin-edit-author");
const adminEditStatusEl = $("admin-edit-status");
const adminEditTextEl = $("admin-edit-text");
let selectedRecordId = null;
function adminSetStatus(msg) {
adminStatusEl.textContent = msg || "";
}
function fmtDate(s) {
try {
const d = new Date(s);
if (Number.isNaN(d.getTime())) return String(s || "");
return d.toLocaleString();
} catch {
return String(s || "");
}
}
async function adminRefresh() {
adminSetStatus("불러오는 중…");
try {
const q = (adminQEl.value || "").trim();
const author = (adminAuthorEl.value || "").trim();
const params = new URLSearchParams();
params.set("limit", "50");
if (q) params.set("q", q);
if (author) params.set("author_id", author);
const r = await fetch(`api/records?${params.toString()}`);
const body = await r.json().catch(() => ({}));
if (!r.ok) throw new Error(body?.detail || `HTTP ${r.status}`);
const items = Array.isArray(body.items) ? body.items : [];
adminTbodyEl.innerHTML = "";
for (const it of items) {
const tr = document.createElement("tr");
tr.className = "record-row";
tr.dataset.id = String(it.id);
tr.innerHTML = `
<td class="nowrap mono">${it.id}</td>
<td class="truncate" title="${it.filename || ""}">${it.filename || "-"}</td>
<td class="nowrap">${it.status || "-"}</td>
<td class="truncate" title="${it.author_id || ""}">${it.author_id || "-"}</td>
<td class="nowrap">${fmtDate(it.created_at)}</td>
<td class="nowrap">
<button class="btn" data-act="open" data-id="${it.id}" type="button">열기</button>
<button class="btn" data-act="del" data-id="${it.id}" type="button">삭제</button>
</td>
`;
adminTbodyEl.appendChild(tr);
}
adminSetStatus(`${body.total ?? items.length}개 (상위 ${items.length}개 표시)`);
} catch (e) {
adminSetStatus(`오류: ${String(e?.message || e)}`);
}
}
async function adminOpen(id) {
selectedRecordId = Number(id);
// selection highlight
for (const row of adminTbodyEl.querySelectorAll("tr.record-row")) {
row.classList.toggle("selected", Number(row.dataset.id) === selectedRecordId);
}
adminMetaEl.textContent = `레코드 #${selectedRecordId} 로딩…`;
adminSaveEl.disabled = true;
adminDeleteEl.disabled = true;
try {
const r = await fetch(`api/records/${encodeURIComponent(String(selectedRecordId))}`);
const body = await r.json().catch(() => ({}));
if (!r.ok) throw new Error(body?.detail || `HTTP ${r.status}`);
adminEditAuthorEl.value = body.author_id || "";
adminEditStatusEl.value = body.status || "completed";
adminEditTextEl.value = body.text || "";
adminMetaEl.textContent = `레코드 #${selectedRecordId} (${body.filename || "-"})`;
adminSaveEl.disabled = false;
adminDeleteEl.disabled = false;
} catch (e) {
adminMetaEl.textContent = `오류: ${String(e?.message || e)}`;
}
}
async function adminSave() {
if (!selectedRecordId) return;
adminSetStatus("저장 중…");
try {
const payload = {
author_id: (adminEditAuthorEl.value || "").trim(),
status: adminEditStatusEl.value,
text: adminEditTextEl.value || "",
};
const r = await fetch(`api/records/${encodeURIComponent(String(selectedRecordId))}`, {
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(payload),
});
const body = await r.json().catch(() => ({}));
if (!r.ok) throw new Error(body?.detail || `HTTP ${r.status}`);
adminSetStatus("저장 완료");
adminMetaEl.textContent = `레코드 #${selectedRecordId} (${body.filename || "-"})`;
await adminRefresh();
} catch (e) {
adminSetStatus(`오류: ${String(e?.message || e)}`);
}
}
async function adminDelete(id = null) {
const rid = id ? Number(id) : selectedRecordId;
if (!rid) return;
if (!confirm(`레코드 #${rid} 를 삭제할까요?`)) return;
adminSetStatus("삭제 중…");
try {
const r = await fetch(`api/records/${encodeURIComponent(String(rid))}`, { method: "DELETE" });
const body = await r.json().catch(() => ({}));
if (!r.ok) throw new Error(body?.detail || `HTTP ${r.status}`);
adminSetStatus("삭제 완료");
if (selectedRecordId === rid) {
selectedRecordId = null;
adminMetaEl.textContent = "레코드 선택 없음";
adminEditAuthorEl.value = "";
adminEditTextEl.value = "";
adminSaveEl.disabled = true;
adminDeleteEl.disabled = true;
}
await adminRefresh();
} catch (e) {
adminSetStatus(`오류: ${String(e?.message || e)}`);
}
}
adminRefreshEl.addEventListener("click", () => adminRefresh());
adminClearFilterEl.addEventListener("click", () => {
adminQEl.value = "";
adminAuthorEl.value = "dosangyoon@gmail.com";
adminRefresh();
});
adminSaveEl.addEventListener("click", () => adminSave());
adminDeleteEl.addEventListener("click", () => adminDelete());
adminTbodyEl.addEventListener("click", (e) => {
const btn = e.target?.closest?.("button[data-act]");
if (!btn) return;
const act = btn.getAttribute("data-act");
const id = btn.getAttribute("data-id");
if (act === "open") adminOpen(id);
if (act === "del") adminDelete(id);
});
// 행 클릭으로도 열기
adminTbodyEl.addEventListener("click", (e) => {
const btn = e.target?.closest?.("button[data-act]");
if (btn) return; // 버튼 클릭은 위 핸들러에서 처리
const row = e.target?.closest?.("tr.record-row");
const id = row?.dataset?.id;
if (id) adminOpen(id);
});
</script>
</body>
</html>

86
app/stt.py Normal file
View File

@@ -0,0 +1,86 @@
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import Any, Iterable, Tuple
from faster_whisper import WhisperModel
@dataclass(frozen=True)
class SegmentOut:
start: float
end: float
text: str
_MODEL: WhisperModel | None = None
def _get_model() -> WhisperModel:
global _MODEL
if _MODEL is not None:
return _MODEL
model_name = os.getenv("APP_WHISPER_MODEL", "small")
device = os.getenv("APP_WHISPER_DEVICE", "cpu")
compute_type = os.getenv("APP_WHISPER_COMPUTE_TYPE", "int8")
# WhisperModel download/cache handled by faster-whisper internally.
_MODEL = WhisperModel(model_name, device=device, compute_type=compute_type)
return _MODEL
def transcribe_iter(
audio_path: str,
*,
language: str | None = None,
vad_filter: bool = True,
beam_size: int = 5,
) -> Tuple[Iterable[Any], Any]:
model = _get_model()
segments_iter, info = model.transcribe(
audio_path,
language=language,
vad_filter=vad_filter,
beam_size=beam_size,
)
return segments_iter, info
def transcribe_file(
audio_path: str,
*,
language: str | None = None,
vad_filter: bool = True,
beam_size: int = 5,
) -> dict[str, Any]:
segments_iter, info = transcribe_iter(
audio_path,
language=language,
vad_filter=vad_filter,
beam_size=beam_size,
)
segments: list[SegmentOut] = []
texts: list[str] = []
for s in _iter_segments(segments_iter):
seg = SegmentOut(start=float(s.start), end=float(s.end), text=(s.text or "").strip())
if seg.text:
segments.append(seg)
texts.append(seg.text)
full_text = "\n".join(texts).strip()
return {
"text": full_text,
"segments": [seg.__dict__ for seg in segments],
"detected_language": getattr(info, "language", None),
"language_probability": getattr(info, "language_probability", None),
"duration_sec": getattr(info, "duration", None),
}
def _iter_segments(segments_iter: Iterable[Any]) -> Iterable[Any]:
for s in segments_iter:
yield s