commit 326b749ca841047d8b7ceebbe4e31ef0c988dc6f Author: dsyoon Date: Wed Feb 25 19:04:18 2026 +0900 Fresh start on MariaDB Gitea diff --git a/.env b/.env new file mode 100644 index 0000000..d741a33 --- /dev/null +++ b/.env @@ -0,0 +1,7 @@ +DB_HOST=ncue.net +DB_PORT=5432 +DB_NAME=tts +DB_USER=ncue +DB_PASSWORD=ncue5004! +TTS_ENGINE=mms +MMS_MODEL=facebook/mms-tts-kor \ No newline at end of file diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..97027d5 --- /dev/null +++ b/.env.example @@ -0,0 +1,7 @@ +DB_HOST=ncue.net +DB_PORT=5432 +DB_NAME=tts +DB_USER=ncue +DB_PASSWORD=your_db_password +TTS_ENGINE=mms +MMS_MODEL=facebook/mms-tts-kor diff --git a/README.md b/README.md new file mode 100644 index 0000000..86e9369 --- /dev/null +++ b/README.md @@ -0,0 +1,49 @@ +# TTS 저장/조회/삭제 앱 + +## 프로젝트 구조 +``` +. +├── client +│ ├── static +│ └── templates +├── server +│ ├── db.py +│ ├── main.py +│ └── tts_service.py +├── resources +├── .env +├── .env.example +├── requirements.txt +└── README.md +``` + +## 실행 방법 +1) 의존성 설치 +``` +pip install -r requirements.txt +``` + +2) 환경 변수 설정 +``` +cp .env.example .env +``` +`.env`에 DB 계정 정보를 입력하세요. + +3) 서버 실행 +``` +uvicorn server.main:app --reload +``` + +4) 접속 +``` +http://localhost:8000 +``` + +## 주의 사항 +- PostgreSQL 접속 정보는 프로젝트 루트의 `.env`에서 로드합니다. +- `server/`에서 실행하더라도 루트 `.env`가 적용됩니다. +- 배포 스크립트 기본 포트는 `8019`이며 `PORT`로 변경할 수 있습니다. +- ffmpeg가 설치되어 있어야 합니다. +- mp3 파일은 `resources/` 아래에 저장됩니다. +- 고품질 TTS를 위해 `TTS_ENGINE=mms`를 설정할 수 있습니다. +- MMS 모델(`facebook/mms-tts-kor`)은 비상업(CC-BY-NC-4.0) 라이선스입니다. diff --git a/client/run.sh b/client/run.sh new file mode 100755 index 0000000..b0cfe2a --- /dev/null +++ b/client/run.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd /home/dsyoon/workspace/tts/client + +echo "Client is served by the FastAPI server." +echo "No build step required." diff --git a/client/static/app.js b/client/static/app.js new file mode 100644 index 0000000..0dbe2e2 --- /dev/null +++ b/client/static/app.js @@ -0,0 +1,216 @@ +const listEl = document.getElementById("tts-list"); +const textInput = document.getElementById("text-input"); +const saveBtn = document.getElementById("save-btn"); +const voiceSelect = document.getElementById("voice-select"); +const editBtn = document.getElementById("edit-btn"); +const deleteBtn = document.getElementById("delete-btn"); +const cancelBtn = document.getElementById("cancel-btn"); +const downloadLink = document.getElementById("download-link"); +const progressWrap = document.getElementById("save-progress"); +const progressBar = document.getElementById("save-progress-bar"); + +let items = []; +let editMode = false; +const selectedIds = new Set(); +let progressTimer = null; +let selectedItemId = null; +let selectedDownloadUrl = null; + +function startProgress() { + let value = 0; + progressWrap.classList.remove("hidden"); + progressBar.style.width = "0%"; + progressBar.setAttribute("aria-valuenow", "0"); + + if (progressTimer) { + clearInterval(progressTimer); + } + + progressTimer = setInterval(() => { + value = Math.min(value + Math.random() * 8 + 2, 90); + progressBar.style.width = `${value}%`; + progressBar.setAttribute("aria-valuenow", `${Math.round(value)}`); + }, 300); +} + +function finishProgress(success = true) { + if (progressTimer) { + clearInterval(progressTimer); + progressTimer = null; + } + + progressBar.style.width = "100%"; + progressBar.setAttribute("aria-valuenow", "100"); + + const delay = success ? 400 : 1200; + setTimeout(() => { + progressBar.style.width = "0%"; + progressBar.setAttribute("aria-valuenow", "0"); + progressWrap.classList.add("hidden"); + }, delay); +} + +function setEditMode(isEdit) { + editMode = isEdit; + selectedIds.clear(); + selectedItemId = null; + selectedDownloadUrl = null; + editBtn.classList.toggle("hidden", editMode); + deleteBtn.classList.toggle("hidden", !editMode); + cancelBtn.classList.toggle("hidden", !editMode); + downloadLink.classList.add("hidden"); + downloadLink.href = "#"; + renderList(); +} + +function renderList() { + listEl.innerHTML = ""; + items.forEach((item) => { + const li = document.createElement("li"); + li.className = "tts-item"; + if (!editMode && selectedItemId === item.id) { + li.classList.add("selected"); + } + + if (editMode) { + const checkbox = document.createElement("input"); + checkbox.type = "checkbox"; + checkbox.checked = selectedIds.has(item.id); + checkbox.addEventListener("click", (event) => { + event.stopPropagation(); + if (checkbox.checked) { + selectedIds.add(item.id); + } else { + selectedIds.delete(item.id); + } + }); + li.appendChild(checkbox); + } else { + const bullet = document.createElement("span"); + bullet.textContent = "•"; + bullet.className = "bullet"; + li.appendChild(bullet); + } + + const label = document.createElement("span"); + label.textContent = item.size_display + ? `${item.display_time} (${item.size_display})` + : item.display_time; + label.className = "item-label"; + li.appendChild(label); + + li.addEventListener("click", () => handleItemClick(item)); + listEl.appendChild(li); + }); +} + +async function loadList() { + const res = await fetch("/api/tts"); + items = await res.json(); + renderList(); +} + +async function handleSave() { + const text = (textInput.value || "").trim(); + const voice = (voiceSelect?.value || "male").trim(); + if (text.length < 11) { + alert("10개 글자 이상이어야 합니다"); + return; + } + + startProgress(); + const res = await fetch("/api/tts", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text, voice }), + }); + + if (!res.ok) { + const err = await res.json().catch(() => ({})); + alert(err.detail || "저장에 실패했습니다."); + finishProgress(false); + return; + } + + const created = await res.json(); + items.unshift(created); + renderList(); + finishProgress(true); +} + +async function handleItemClick(item) { + if (editMode) { + if (selectedIds.has(item.id)) { + selectedIds.delete(item.id); + } else { + selectedIds.add(item.id); + } + renderList(); + return; + } + + if (selectedItemId === item.id) { + selectedItemId = null; + selectedDownloadUrl = null; + downloadLink.href = "#"; + downloadLink.classList.add("hidden"); + renderList(); + return; + } + + const res = await fetch(`/api/tts/${item.id}`); + if (!res.ok) { + alert("항목을 불러오지 못했습니다."); + return; + } + + const data = await res.json(); + textInput.value = data.text || ""; + selectedItemId = item.id; + selectedDownloadUrl = data.download_url; + downloadLink.href = selectedDownloadUrl; + downloadLink.classList.remove("hidden"); + renderList(); +} + +async function handleDelete() { + const ids = Array.from(selectedIds); + if (ids.length === 0) { + alert("삭제할 항목을 선택하세요."); + return; + } + + const res = await fetch("/api/tts", { + method: "DELETE", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ ids }), + }); + + if (!res.ok) { + alert("삭제에 실패했습니다."); + return; + } + + const data = await res.json(); + const deletedSet = new Set(data.deleted || []); + items = items.filter((item) => !deletedSet.has(item.id)); + textInput.value = ""; + selectedItemId = null; + selectedDownloadUrl = null; + downloadLink.href = "#"; + downloadLink.classList.add("hidden"); + setEditMode(false); +} + +saveBtn.addEventListener("click", handleSave); +editBtn.addEventListener("click", () => setEditMode(true)); +cancelBtn.addEventListener("click", () => setEditMode(false)); +deleteBtn.addEventListener("click", handleDelete); +downloadLink.addEventListener("click", (event) => { + if (!selectedDownloadUrl) { + event.preventDefault(); + alert("다운로드할 항목을 선택하세요."); + } +}); + +loadList(); diff --git a/client/static/styles.css b/client/static/styles.css new file mode 100644 index 0000000..5d0e5f1 --- /dev/null +++ b/client/static/styles.css @@ -0,0 +1,150 @@ +* { + box-sizing: border-box; +} + +body { + margin: 0; + font-family: "Apple SD Gothic Neo", "Malgun Gothic", sans-serif; + background: #f4f4f4; +} + +.container { + display: grid; + grid-template-columns: 2fr 1fr; + gap: 24px; + padding: 24px; + height: 100vh; +} + +.panel { + background: #ffffff; + border: 1px solid #d2d2d2; + padding: 20px; + display: flex; + flex-direction: column; + gap: 16px; +} + +.panel-header { + font-weight: 700; + font-size: 18px; + color: #444; +} + +textarea { + width: 100%; + resize: none; + padding: 12px; + border: 1px solid #c9c9c9; + border-radius: 4px; + font-size: 16px; + line-height: 1.5; + flex: 1; +} + +.save-row { + display: flex; + gap: 12px; + align-items: center; +} + +.voice-select { + border: 1px solid #c9c9c9; + border-radius: 4px; + padding: 10px 12px; + font-size: 14px; + background: #ffffff; + color: #333; +} + +button { + border: none; + padding: 12px 18px; + font-size: 16px; + cursor: pointer; + border-radius: 4px; +} + +button.primary { + background: #1f5f7a; + color: #ffffff; +} + +button.secondary { + background: #4f9acb; + color: #ffffff; +} + +button.danger { + background: #c84040; + color: #ffffff; +} + +.progress-wrap { + width: 100%; + height: 10px; + background: #f1d9a6; + border-radius: 6px; + overflow: hidden; +} + +.progress-bar { + height: 100%; + width: 0%; + background: #f5a623; + transition: width 0.2s ease; +} + +.tts-list { + list-style: none; + padding: 0; + margin: 0; + flex: 1; + overflow-y: auto; +} + +.tts-item { + display: flex; + align-items: center; + gap: 12px; + padding: 10px 6px; + cursor: pointer; + border-bottom: 1px solid #eee; +} + +.tts-item:hover { + background: #f1f6f9; +} + +.tts-item.selected { + background: #f7f7f7; + border-left: 4px solid #e2e2e2; + padding-left: 2px; +} + +.bullet { + font-size: 18px; + color: #555; +} + +.item-label { + font-size: 15px; + color: #333; +} + +.right-actions { + display: flex; + gap: 12px; + justify-content: flex-end; +} + +.hidden { + display: none; +} + +#download-link { + text-align: right; + color: #1f5f7a; + text-decoration: none; + font-size: 14px; +} diff --git a/client/templates/index.html b/client/templates/index.html new file mode 100644 index 0000000..47befe9 --- /dev/null +++ b/client/templates/index.html @@ -0,0 +1,48 @@ + + + + + + 글소리 (TTS, 텍스트→MP3변환) + + + +
+
+
입력 텍스트
+ +
+ + +
+ +
+ +
+
파일리스트
+
    + +
    + + + +
    + +
    +
    + + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d0b3ec6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +fastapi +uvicorn +python-dotenv +psycopg2-binary +pyttsx3 +jinja2 +torch +transformers +soundfile +uroman diff --git a/resources/tts_23_20260130_213334.mp3 b/resources/tts_23_20260130_213334.mp3 new file mode 100644 index 0000000..5856020 Binary files /dev/null and b/resources/tts_23_20260130_213334.mp3 differ diff --git a/resources/tts_24_20260130_213410.mp3 b/resources/tts_24_20260130_213410.mp3 new file mode 100644 index 0000000..422e000 Binary files /dev/null and b/resources/tts_24_20260130_213410.mp3 differ diff --git a/server.log b/server.log new file mode 100644 index 0000000..28684bc Binary files /dev/null and b/server.log differ diff --git a/server/__init__.py b/server/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/server/__init__.py @@ -0,0 +1 @@ + diff --git a/server/__pycache__/__init__.cpython-311.pyc b/server/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..8f459e3 Binary files /dev/null and b/server/__pycache__/__init__.cpython-311.pyc differ diff --git a/server/__pycache__/db.cpython-311.pyc b/server/__pycache__/db.cpython-311.pyc new file mode 100644 index 0000000..1b0e1f3 Binary files /dev/null and b/server/__pycache__/db.cpython-311.pyc differ diff --git a/server/__pycache__/main.cpython-311.pyc b/server/__pycache__/main.cpython-311.pyc new file mode 100644 index 0000000..aa9e2ec Binary files /dev/null and b/server/__pycache__/main.cpython-311.pyc differ diff --git a/server/__pycache__/tts_service.cpython-311.pyc b/server/__pycache__/tts_service.cpython-311.pyc new file mode 100644 index 0000000..60b7a6f Binary files /dev/null and b/server/__pycache__/tts_service.cpython-311.pyc differ diff --git a/server/db.py b/server/db.py new file mode 100644 index 0000000..18bdcc0 --- /dev/null +++ b/server/db.py @@ -0,0 +1,159 @@ +import os +from typing import List, Optional, Dict, Any + +import psycopg2 +import psycopg2.extras + + +def get_conn(): + user = os.getenv("DB_USER") + password = os.getenv("DB_PASSWORD") + if not user or not password: + raise RuntimeError("DB_USER 또는 DB_PASSWORD가 설정되지 않았습니다.") + + return psycopg2.connect( + host="ncue.net", + port=5432, + dbname="tts", + user=user, + password=password, + ) + + +def init_db(): + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """ + CREATE TABLE IF NOT EXISTS tts_items ( + id SERIAL PRIMARY KEY, + text TEXT NOT NULL, + filename TEXT, + size_bytes BIGINT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ); + """ + ) + cur.execute( + """ + ALTER TABLE tts_items + ADD COLUMN IF NOT EXISTS size_bytes BIGINT; + """ + ) + cur.execute( + """ + CREATE INDEX IF NOT EXISTS tts_items_created_at_idx + ON tts_items (created_at DESC); + """ + ) + conn.commit() + + +def create_item(text: str) -> Dict[str, Any]: + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute( + """ + INSERT INTO tts_items (text) + VALUES (%s) + RETURNING id, created_at; + """, + (text,), + ) + row = cur.fetchone() + conn.commit() + return row + + +def update_filename(tts_id: int, filename: str) -> None: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """ + UPDATE tts_items + SET filename = %s + WHERE id = %s; + """, + (filename, tts_id), + ) + conn.commit() + + +def update_size_bytes(tts_id: int, size_bytes: int) -> None: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """ + UPDATE tts_items + SET size_bytes = %s + WHERE id = %s; + """, + (size_bytes, tts_id), + ) + conn.commit() + + +def list_items() -> List[Dict[str, Any]]: + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute( + """ + SELECT id, created_at, filename, size_bytes + FROM tts_items + ORDER BY created_at DESC; + """ + ) + rows = cur.fetchall() + return rows + + +def get_item(tts_id: int) -> Optional[Dict[str, Any]]: + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute( + """ + SELECT id, text, filename, size_bytes, created_at + FROM tts_items + WHERE id = %s; + """, + (tts_id,), + ) + row = cur.fetchone() + return row + + +def delete_items(ids: List[int]) -> List[Dict[str, Any]]: + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute( + """ + SELECT id, filename + FROM tts_items + WHERE id = ANY(%s); + """, + (ids,), + ) + rows = cur.fetchall() + + cur.execute( + """ + DELETE FROM tts_items + WHERE id = ANY(%s); + """, + (ids,), + ) + conn.commit() + return rows + + +def delete_item_by_id(tts_id: int) -> None: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """ + DELETE FROM tts_items + WHERE id = %s; + """, + (tts_id,), + ) + conn.commit() diff --git a/server/main.py b/server/main.py new file mode 100644 index 0000000..4b93cdd --- /dev/null +++ b/server/main.py @@ -0,0 +1,213 @@ +from pathlib import Path +from typing import List + +import logging + +from dotenv import load_dotenv +from fastapi import FastAPI, HTTPException, Request +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import FileResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates +from pydantic import BaseModel + +from .db import ( + init_db, + create_item, + update_filename, + update_size_bytes, + list_items, + get_item, + delete_items, + delete_item_by_id, +) +from .tts_service import text_to_mp3 + +BASE_DIR = Path(__file__).resolve().parent +ROOT_DIR = BASE_DIR.parent +CLIENT_DIR = ROOT_DIR / "client" +RESOURCES_DIR = ROOT_DIR / "resources" + +# 프로젝트 루트의 .env를 명시적으로 로드 +load_dotenv(dotenv_path=ROOT_DIR / ".env") + +app = FastAPI() +logger = logging.getLogger("tts") + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.mount("/static", StaticFiles(directory=str(CLIENT_DIR / "static")), name="static") +templates = Jinja2Templates(directory=str(CLIENT_DIR / "templates")) + + +class TtsCreateRequest(BaseModel): + text: str + voice: str | None = None + + +class TtsDeleteRequest(BaseModel): + ids: List[int] + + +def format_display_time(dt): + # 한국 표기 형식으로 변환 + local_dt = dt.astimezone() + return local_dt.strftime("%Y년 %m월 %d일 %H:%M:%S") + + +def ensure_resources_dir(): + # mp3 저장 디렉토리 보장 + RESOURCES_DIR.mkdir(parents=True, exist_ok=True) + + +def format_size(bytes_size: int) -> str: + if bytes_size < 1024: + return f"{bytes_size}B" + if bytes_size < 1024 * 1024: + return f"{bytes_size / 1024:.1f}KB" + return f"{bytes_size / (1024 * 1024):.1f}MB" + + +def get_file_size_display(size_bytes: int | None) -> str | None: + if size_bytes is None: + return None + return format_size(size_bytes) + + +def get_file_size_bytes(filename: str | None) -> int | None: + if not filename: + return None + file_path = RESOURCES_DIR / filename + if not file_path.exists(): + return None + return file_path.stat().st_size + + +@app.on_event("startup") +def on_startup(): + ensure_resources_dir() + init_db() + + +@app.get("/") +def index(request: Request): + return templates.TemplateResponse("index.html", {"request": request}) + + +@app.get("/api/tts") +def api_list_tts(): + rows = list_items() + payload = [] + for row in rows: + size_bytes = row.get("size_bytes") + if size_bytes is None and row.get("filename"): + computed = get_file_size_bytes(row["filename"]) + if computed is not None: + update_size_bytes(row["id"], computed) + size_bytes = computed + payload.append( + { + "id": row["id"], + "created_at": row["created_at"].isoformat(), + "display_time": format_display_time(row["created_at"]), + "filename": row["filename"], + "size_display": get_file_size_display(size_bytes), + } + ) + return payload + + +@app.post("/api/tts") +def api_create_tts(payload: TtsCreateRequest): + text = (payload.text or "").strip() + voice = (payload.voice or "").strip().lower() + if len(text) < 11: + raise HTTPException(status_code=400, detail="텍스트는 11글자 이상이어야 합니다.") + + created = create_item(text) + tts_id = created["id"] + created_at = created["created_at"] + + timestamp = created_at.astimezone().strftime("%Y%m%d_%H%M%S") + filename = f"tts_{tts_id}_{timestamp}.mp3" + mp3_path = RESOURCES_DIR / filename + + try: + text_to_mp3(text=text, mp3_path=str(mp3_path), voice=voice) + except Exception as exc: + logger.exception("TTS 생성 실패") + delete_item_by_id(tts_id) + raise HTTPException(status_code=500, detail=str(exc)) from exc + + size_bytes = get_file_size_bytes(filename) + update_filename(tts_id, filename) + if size_bytes is not None: + update_size_bytes(tts_id, size_bytes) + + return { + "id": tts_id, + "created_at": created_at.isoformat(), + "display_time": format_display_time(created_at), + "filename": filename, + "size_display": get_file_size_display(size_bytes), + } + + +@app.get("/api/tts/{tts_id}") +def api_get_tts(tts_id: int): + row = get_item(tts_id) + if not row: + raise HTTPException(status_code=404, detail="해당 항목이 없습니다.") + + return { + "id": row["id"], + "text": row["text"], + "created_at": row["created_at"].isoformat(), + "display_time": format_display_time(row["created_at"]), + "filename": row["filename"], + "download_url": f"/api/tts/{row['id']}/download", + } + + +@app.get("/api/tts/{tts_id}/download") +def api_download_tts(tts_id: int): + row = get_item(tts_id) + if not row or not row["filename"]: + raise HTTPException(status_code=404, detail="파일이 없습니다.") + + file_path = RESOURCES_DIR / row["filename"] + if not file_path.exists(): + raise HTTPException(status_code=404, detail="파일이 없습니다.") + + return FileResponse( + path=str(file_path), + media_type="audio/mpeg", + filename=row["filename"], + ) + + +@app.delete("/api/tts") +def api_delete_tts(payload: TtsDeleteRequest): + ids = [int(i) for i in payload.ids if isinstance(i, int) or str(i).isdigit()] + if not ids: + raise HTTPException(status_code=400, detail="삭제할 항목이 없습니다.") + + deleted_rows = delete_items(ids) + deleted_ids = [] + for row in deleted_rows: + deleted_ids.append(row["id"]) + if row.get("filename"): + file_path = RESOURCES_DIR / row["filename"] + if file_path.exists(): + try: + file_path.unlink() + except OSError: + pass + + return {"deleted": deleted_ids} diff --git a/server/run.sh b/server/run.sh new file mode 100755 index 0000000..3c765c9 --- /dev/null +++ b/server/run.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd /home/dsyoon/workspace/tts + +CONDA_BASE="/home/dsyoon/workspace/miniconda3" +source "${CONDA_BASE}/bin/activate" tts +export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH:-}" + +PORT="${PORT:-8019}" + +if lsof -ti tcp:"${PORT}" >/dev/null 2>&1; then + echo "Stopping existing server on port ${PORT}..." + lsof -ti tcp:"${PORT}" | xargs -r kill -9 + sleep 1 +fi + +PORT="${PORT}" nohup python -m uvicorn server.main:app --host 0.0.0.0 --port "${PORT}" > server.log 2>&1 & +echo "Server started (PID: $!). Logs: server.log" diff --git a/server/tts_service.py b/server/tts_service.py new file mode 100644 index 0000000..af21598 --- /dev/null +++ b/server/tts_service.py @@ -0,0 +1,285 @@ +import os +import re +import subprocess +import tempfile +from pathlib import Path +from typing import Optional, Tuple + +import pyttsx3 + +_MMS_CACHE: Optional[Tuple[object, object]] = None +_LETTER_KO = { + "A": "에이", + "B": "비", + "C": "씨", + "D": "디", + "E": "이", + "F": "에프", + "G": "지", + "H": "에이치", + "I": "아이", + "J": "제이", + "K": "케이", + "L": "엘", + "M": "엠", + "N": "엔", + "O": "오", + "P": "피", + "Q": "큐", + "R": "알", + "S": "에스", + "T": "티", + "U": "유", + "V": "브이", + "W": "더블유", + "X": "엑스", + "Y": "와이", + "Z": "지", +} +_PHRASE_MAP = [ + ("Automatic Document Feeder", "오토매틱 도큐먼트 피더"), + ("Naver Blog", "네이버 블로그"), + ("Brother Korea", "브라더 코리아"), +] +_NUM_KO = { + 0: "영", + 1: "일", + 2: "이", + 3: "삼", + 4: "사", + 5: "오", + 6: "육", + 7: "칠", + 8: "팔", + 9: "구", +} + + +def _get_mms(): + global _MMS_CACHE + if _MMS_CACHE is not None: + return _MMS_CACHE + + try: + from transformers import VitsModel, AutoTokenizer + import torch + except Exception as exc: + raise RuntimeError("MMS TTS 사용을 위해 transformers/torch 설치가 필요합니다.") from exc + + model_name = os.getenv("MMS_MODEL", "facebook/mms-tts-kor") + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = VitsModel.from_pretrained(model_name) + model.eval() + + _MMS_CACHE = (model, tokenizer) + return _MMS_CACHE + + +def _text_to_wav_mms(text: str, wav_path: str) -> None: + try: + import torch + except Exception as exc: + raise RuntimeError("MMS TTS 사용을 위해 torch/numpy가 정상 설치되어야 합니다.") from exc + + try: + import soundfile as sf + except Exception as exc: + raise RuntimeError("MMS TTS 사용을 위해 soundfile 설치가 필요합니다.") from exc + + model, tokenizer = _get_mms() + + text = text.strip() + if not text: + raise RuntimeError("MMS 입력 텍스트가 비어 있습니다.") + + # 한국어 입력은 uroman 전처리가 필요할 수 있음 + try: + from uroman import uroman + + text = uroman(text) + except Exception: + pass + + inputs = tokenizer(text, return_tensors="pt") + if inputs["input_ids"].shape[1] == 0: + raise RuntimeError("MMS 토크나이저 입력이 비어 있습니다.") + with torch.no_grad(): + audio = model(**inputs).waveform.squeeze().cpu().numpy() + + sample_rate = getattr(model.config, "sampling_rate", 22050) + # MMS 출력은 float이므로 PCM16으로 저장해 왜곡을 줄입니다. + sf.write(wav_path, audio, sample_rate, subtype="PCM_16") + + +def _select_korean_voice(engine: pyttsx3.Engine, prefer_female: bool = False) -> None: + try: + voices = engine.getProperty("voices") or [] + except Exception: + return + + def _voice_info(v): + values = [] + if getattr(v, "languages", None): + values.extend(v.languages) + if getattr(v, "id", None): + values.append(v.id) + if getattr(v, "name", None): + values.append(v.name) + return " ".join(str(x) for x in values).lower() + + def _is_korean(info: str) -> bool: + return "ko" in info or "korean" in info + + def _is_female(info: str) -> bool: + return any(token in info for token in ["female", "woman", "girl", "여성", "여자"]) + + if prefer_female: + for voice in voices: + info = _voice_info(voice) + if _is_korean(info) and _is_female(info): + try: + engine.setProperty("voice", voice.id) + return + except Exception: + continue + + for voice in voices: + info = _voice_info(voice) + if _is_korean(info): + try: + engine.setProperty("voice", voice.id) + return + except Exception: + continue + + +def _spell_abbrev(match: re.Match) -> str: + return " ".join(_LETTER_KO.get(ch, ch) for ch in match.group(0)) + + +def _sino_korean(num: int) -> str: + if num == 0: + return _NUM_KO[0] + + parts = [] + if num >= 1000: + thousands = num // 1000 + if thousands > 1: + parts.append(_NUM_KO[thousands]) + parts.append("천") + num %= 1000 + if num >= 100: + hundreds = num // 100 + if hundreds > 1: + parts.append(_NUM_KO[hundreds]) + parts.append("백") + num %= 100 + if num >= 10: + tens = num // 10 + if tens > 1: + parts.append(_NUM_KO[tens]) + parts.append("십") + num %= 10 + if num > 0: + parts.append(_NUM_KO[num]) + return "".join(parts) + + +def _replace_numbers(text: str) -> str: + def _year(match: re.Match) -> str: + return f"{_sino_korean(int(match.group(1)))}년" + + def _month_day(match: re.Match) -> str: + month = _sino_korean(int(match.group(1))) + day = _sino_korean(int(match.group(2))) + return f"{month}월 {day}일" + + def _approx(match: re.Match) -> str: + return f"{_sino_korean(int(match.group(1)))}여" + + def _count(match: re.Match) -> str: + return f"{_sino_korean(int(match.group(1)))}명" + + text = re.sub(r"\b(\d{4})\s*년\b", _year, text) + text = re.sub(r"\b(\d{1,2})\s*월\s*(\d{1,2})\s*일\b", _month_day, text) + text = re.sub(r"\b(\d+)\s*여\b", _approx, text) + text = re.sub(r"\b(\d+)\s*명\b", _count, text) + return text + + +def _preprocess_text(text: str) -> str: + # 영어 약어/브랜드 발음 보정 + for src, dst in _PHRASE_MAP: + text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE) + text = _replace_numbers(text) + text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text) + # 괄호/구두점으로 인한 끊김을 완화 + text = text.replace("(", " ").replace(")", " ") + return text + + +def text_to_mp3(text: str, mp3_path: str, voice: Optional[str] = None) -> None: + if not text: + raise RuntimeError("텍스트가 비어 있습니다.") + + text = _preprocess_text(text) + + mp3_target = Path(mp3_path) + mp3_target.parent.mkdir(parents=True, exist_ok=True) + + tts_engine = os.getenv("TTS_ENGINE", "pyttsx3").strip().lower() + voice = (voice or "").strip().lower() or None + wav_fd, wav_path = tempfile.mkstemp(suffix=".wav") + os.close(wav_fd) + + try: + if tts_engine == "mms": + _text_to_wav_mms(text, wav_path) + audio_filter = "highpass=f=80,lowpass=f=12000" + else: + engine = pyttsx3.init() + # 음질 개선: 속도/볼륨 조정 및 한국어 음성 우선 선택 + try: + # 서버 음성이 늘어지는 현상 완화 + engine.setProperty("rate", 210) + engine.setProperty("volume", 1.0) + except Exception: + pass + _select_korean_voice(engine, prefer_female=voice == "female") + # pyttsx3로 wav 생성 후 ffmpeg로 mp3 변환 + engine.save_to_file(text, wav_path) + engine.runAndWait() + audio_filter = "loudnorm=I=-16:LRA=11:TP=-1.5,atempo=1.15" + + subprocess.run( + [ + "ffmpeg", + "-y", + "-i", + wav_path, + "-ac", + "2", + "-ar", + "44100", + "-b:a", + "192k", + "-af", + audio_filter, + str(mp3_target), + ], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + if not mp3_target.exists(): + raise RuntimeError("mp3 파일 생성에 실패했습니다.") + except subprocess.CalledProcessError as exc: + raise RuntimeError("ffmpeg 변환에 실패했습니다.") from exc + except OSError as exc: + raise RuntimeError("파일 생성 권한 또는 경로 오류입니다.") from exc + finally: + try: + os.remove(wav_path) + except OSError: + pass