Update voice option labels

Rename voice selector labels in the client template.
Add voice selection control
2026-01-30 21:37:27 +09:00 · 2026-01-30 21:32:20 +09:00 · 2026-01-30 20:59:41 +09:00 · 2026-01-30 20:55:50 +09:00 · 2026-01-30 20:51:18 +09:00 · 2026-01-30 20:47:52 +09:00
8 changed files with 300 additions and 54 deletions
--- a/apache/tts.conf
+++ b/apache/tts.conf
@@ -1,21 +0,0 @@
 <VirtualHost *:80>
  ServerName tts.ncue.net
  Redirect permanent / https://tts.ncue.net/
 </VirtualHost>
 <VirtualHost *:443>
  ServerName tts.ncue.net
  SSLEngine on
  SSLCertificateFile /etc/letsencrypt/live/ncue.net/fullchain.pem
  SSLCertificateKeyFile /etc/letsencrypt/live/ncue.net/privkey.pem
  # 선택: 체인 파일이 필요하면 아래 경로 사용
  # SSLCertificateChainFile /etc/letsencrypt/live/ncue.net/chain.pem
  ProxyRequests Off
  ProxyPreserveHost On
  # 전체 프록시 (FastAPI가 정적/템플릿 포함 제공)
  ProxyPass / http://127.0.0.1:8019/
  ProxyPassReverse / http://127.0.0.1:8019/
 </VirtualHost>
--- a/client/static/app.js
+++ b/client/static/app.js
@@ -1,6 +1,7 @@
 const listEl = document.getElementById("tts-list");
 const textInput = document.getElementById("text-input");
 const saveBtn = document.getElementById("save-btn");
 const voiceSelect = document.getElementById("voice-select");
 const editBtn = document.getElementById("edit-btn");
 const deleteBtn = document.getElementById("delete-btn");
 const cancelBtn = document.getElementById("cancel-btn");
@@ -12,6 +13,8 @@ let items = [];
 let editMode = false;
 const selectedIds = new Set();
 let progressTimer = null;
 let selectedItemId = null;
 let selectedDownloadUrl = null;
 function startProgress() {
  let value = 0;
@@ -50,10 +53,13 @@ function finishProgress(success = true) {
 function setEditMode(isEdit) {
  editMode = isEdit;
  selectedIds.clear();
  selectedItemId = null;
  selectedDownloadUrl = null;
  editBtn.classList.toggle("hidden", editMode);
  deleteBtn.classList.toggle("hidden", !editMode);
  cancelBtn.classList.toggle("hidden", !editMode);
  downloadLink.classList.add("hidden");
  downloadLink.href = "#";
  renderList();
 }
@@ -62,6 +68,9 @@ function renderList() {
  items.forEach((item) => {
    const li = document.createElement("li");
    li.className = "tts-item";
    if (!editMode && selectedItemId === item.id) {
      li.classList.add("selected");
    }
    if (editMode) {
      const checkbox = document.createElement("input");
@@ -84,7 +93,9 @@ function renderList() {
    }
    const label = document.createElement("span");
-    label.textContent = item.display_time;
+    label.textContent = item.size_display
      ? `${item.display_time} (${item.size_display})`
      : item.display_time;
    label.className = "item-label";
    li.appendChild(label);
@@ -101,6 +112,7 @@ async function loadList() {
 async function handleSave() {
  const text = (textInput.value || "").trim();
  const voice = (voiceSelect?.value || "male").trim();
  if (text.length < 11) {
    alert("10개 글자 이상이어야 합니다");
    return;
@@ -110,7 +122,7 @@ async function handleSave() {
  const res = await fetch("/api/tts", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
-    body: JSON.stringify({ text }),
+    body: JSON.stringify({ text, voice }),
  });
  if (!res.ok) {
@@ -137,6 +149,15 @@ async function handleItemClick(item) {
    return;
  }
  if (selectedItemId === item.id) {
    selectedItemId = null;
    selectedDownloadUrl = null;
    downloadLink.href = "#";
    downloadLink.classList.add("hidden");
    renderList();
    return;
  }
  const res = await fetch(`/api/tts/${item.id}`);
  if (!res.ok) {
    alert("항목을 불러오지 못했습니다.");
@@ -145,9 +166,11 @@ async function handleItemClick(item) {
  const data = await res.json();
  textInput.value = data.text || "";
-  downloadLink.href = data.download_url;
+  selectedItemId = item.id;
  selectedDownloadUrl = data.download_url;
  downloadLink.href = selectedDownloadUrl;
  downloadLink.classList.remove("hidden");
-  downloadLink.click();
+  renderList();
 }
 async function handleDelete() {
@@ -172,6 +195,8 @@ async function handleDelete() {
  const deletedSet = new Set(data.deleted || []);
  items = items.filter((item) => !deletedSet.has(item.id));
  textInput.value = "";
  selectedItemId = null;
  selectedDownloadUrl = null;
  downloadLink.href = "#";
  downloadLink.classList.add("hidden");
  setEditMode(false);
@@ -181,5 +206,11 @@ saveBtn.addEventListener("click", handleSave);
 editBtn.addEventListener("click", () => setEditMode(true));
 cancelBtn.addEventListener("click", () => setEditMode(false));
 deleteBtn.addEventListener("click", handleDelete);
 downloadLink.addEventListener("click", (event) => {
  if (!selectedDownloadUrl) {
    event.preventDefault();
    alert("다운로드할 항목을 선택하세요.");
  }
 });
 loadList();
--- a/client/static/styles.css
+++ b/client/static/styles.css
@@ -42,6 +42,21 @@ textarea {
  flex: 1;
 }
 .save-row {
  display: flex;
  gap: 12px;
  align-items: center;
 }
 .voice-select {
  border: 1px solid #c9c9c9;
  border-radius: 4px;
  padding: 10px 12px;
  font-size: 14px;
  background: #ffffff;
  color: #333;
 }
 button {
  border: none;
  padding: 12px 18px;
@@ -101,6 +116,12 @@ button.danger {
  background: #f1f6f9;
 }
 .tts-item.selected {
  background: #f7f7f7;
  border-left: 4px solid #e2e2e2;
  padding-left: 2px;
 }
 .bullet {
  font-size: 18px;
  color: #555;
--- a/client/templates/index.html
+++ b/client/templates/index.html
@@ -3,7 +3,7 @@
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>TTS 저장/조회/삭제</title>
+    <title>글소리 (TTS, 텍스트→MP3변환)</title>
    <link rel="stylesheet" href="/static/styles.css" />
  </head>
  <body>
@@ -11,7 +11,13 @@
      <section class="panel left">
        <div class="panel-header">입력 텍스트</div>
        <textarea id="text-input" rows="16" placeholder="텍스트를 입력하세요"></textarea>
-        <button id="save-btn" class="primary">mp3 저장</button>
+        <div class="save-row">
          <button id="save-btn" class="primary">mp3 변환</button>
          <select id="voice-select" class="voice-select" aria-label="음성 선택">
            <option value="male">음성 #1</option>
            <option value="female">음성 #2</option>
          </select>
        </div>
        <div id="save-progress" class="progress-wrap hidden" aria-label="저장 진행률">
          <div
            id="save-progress-bar"
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ jinja2
 torch
 transformers
 soundfile
 uroman
--- a/server/db.py
+++ b/server/db.py
@@ -29,10 +29,17 @@ def init_db():
                    id SERIAL PRIMARY KEY,
                    text TEXT NOT NULL,
                    filename TEXT,
                    size_bytes BIGINT,
                    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
                );
                """
            )
            cur.execute(
                """
                ALTER TABLE tts_items
                ADD COLUMN IF NOT EXISTS size_bytes BIGINT;
                """
            )
            cur.execute(
                """
                CREATE INDEX IF NOT EXISTS tts_items_created_at_idx
@@ -72,12 +79,26 @@ def update_filename(tts_id: int, filename: str) -> None:
        conn.commit()
 def update_size_bytes(tts_id: int, size_bytes: int) -> None:
    with get_conn() as conn:
        with conn.cursor() as cur:
            cur.execute(
                """
                UPDATE tts_items
                SET size_bytes = %s
                WHERE id = %s;
                """,
                (size_bytes, tts_id),
            )
        conn.commit()
 def list_items() -> List[Dict[str, Any]]:
    with get_conn() as conn:
        with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
            cur.execute(
                """
-                SELECT id, created_at, filename
+                SELECT id, created_at, filename, size_bytes
                FROM tts_items
                ORDER BY created_at DESC;
                """
@@ -91,7 +112,7 @@ def get_item(tts_id: int) -> Optional[Dict[str, Any]]:
        with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
            cur.execute(
                """
-                SELECT id, text, filename, created_at
+                SELECT id, text, filename, size_bytes, created_at
                FROM tts_items
                WHERE id = %s;
                """,
--- a/server/main.py
+++ b/server/main.py
@@ -15,6 +15,7 @@ from .db import (
    init_db,
    create_item,
    update_filename,
    update_size_bytes,
    list_items,
    get_item,
    delete_items,
@@ -47,6 +48,7 @@ templates = Jinja2Templates(directory=str(CLIENT_DIR / "templates"))
 class TtsCreateRequest(BaseModel):
    text: str
    voice: str | None = None
 class TtsDeleteRequest(BaseModel):
@@ -64,6 +66,29 @@ def ensure_resources_dir():
    RESOURCES_DIR.mkdir(parents=True, exist_ok=True)
 def format_size(bytes_size: int) -> str:
    if bytes_size < 1024:
        return f"{bytes_size}B"
    if bytes_size < 1024 * 1024:
        return f"{bytes_size / 1024:.1f}KB"
    return f"{bytes_size / (1024 * 1024):.1f}MB"
 def get_file_size_display(size_bytes: int | None) -> str | None:
    if size_bytes is None:
        return None
    return format_size(size_bytes)
 def get_file_size_bytes(filename: str | None) -> int | None:
    if not filename:
        return None
    file_path = RESOURCES_DIR / filename
    if not file_path.exists():
        return None
    return file_path.stat().st_size
@app.on_event("startup")
 def on_startup():
    ensure_resources_dir()
@@ -78,20 +103,30 @@ def index(request: Request):
@app.get("/api/tts")
 def api_list_tts():
    rows = list_items()
-    return [
+    payload = []
-        {
+    for row in rows:
-            "id": row["id"],
+        size_bytes = row.get("size_bytes")
-            "created_at": row["created_at"].isoformat(),
+        if size_bytes is None and row.get("filename"):
-            "display_time": format_display_time(row["created_at"]),
+            computed = get_file_size_bytes(row["filename"])
-            "filename": row["filename"],
+            if computed is not None:
-        }
+                update_size_bytes(row["id"], computed)
-        for row in rows
+                size_bytes = computed
-    ]
+        payload.append(
            {
                "id": row["id"],
                "created_at": row["created_at"].isoformat(),
                "display_time": format_display_time(row["created_at"]),
                "filename": row["filename"],
                "size_display": get_file_size_display(size_bytes),
            }
        )
    return payload
@app.post("/api/tts")
 def api_create_tts(payload: TtsCreateRequest):
    text = (payload.text or "").strip()
    voice = (payload.voice or "").strip().lower()
    if len(text) < 11:
        raise HTTPException(status_code=400, detail="텍스트는 11글자 이상이어야 합니다.")
@@ -104,19 +139,23 @@ def api_create_tts(payload: TtsCreateRequest):
    mp3_path = RESOURCES_DIR / filename
    try:
-        text_to_mp3(text=text, mp3_path=str(mp3_path))
+        text_to_mp3(text=text, mp3_path=str(mp3_path), voice=voice)
    except Exception as exc:
        logger.exception("TTS 생성 실패")
        delete_item_by_id(tts_id)
        raise HTTPException(status_code=500, detail=str(exc)) from exc
    size_bytes = get_file_size_bytes(filename)
    update_filename(tts_id, filename)
    if size_bytes is not None:
        update_size_bytes(tts_id, size_bytes)
    return {
        "id": tts_id,
        "created_at": created_at.isoformat(),
        "display_time": format_display_time(created_at),
        "filename": filename,
        "size_display": get_file_size_display(size_bytes),
    }
--- a/server/tts_service.py
+++ b/server/tts_service.py
@@ -1,4 +1,5 @@
 import os
 import re
 import subprocess
 import tempfile
 from pathlib import Path
@@ -7,6 +8,51 @@ from typing import Optional, Tuple
 import pyttsx3
 _MMS_CACHE: Optional[Tuple[object, object]] = None
 _LETTER_KO = {
    "A": "에이",
    "B": "비",
    "C": "씨",
    "D": "디",
    "E": "이",
    "F": "에프",
    "G": "지",
    "H": "에이치",
    "I": "아이",
    "J": "제이",
    "K": "케이",
    "L": "엘",
    "M": "엠",
    "N": "엔",
    "O": "오",
    "P": "피",
    "Q": "큐",
    "R": "알",
    "S": "에스",
    "T": "티",
    "U": "유",
    "V": "브이",
    "W": "더블유",
    "X": "엑스",
    "Y": "와이",
    "Z": "지",
 }
 _PHRASE_MAP = [
    ("Automatic Document Feeder", "오토매틱 도큐먼트 피더"),
    ("Naver Blog", "네이버 블로그"),
    ("Brother Korea", "브라더 코리아"),
 ]
 _NUM_KO = {
    0: "영",
    1: "일",
    2: "이",
    3: "삼",
    4: "사",
    5: "오",
    6: "육",
    7: "칠",
    8: "팔",
    9: "구",
 }
 def _get_mms():
@@ -41,31 +87,65 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None:
        raise RuntimeError("MMS TTS 사용을 위해 soundfile 설치가 필요합니다.") from exc
    model, tokenizer = _get_mms()
    text = text.strip()
    if not text:
        raise RuntimeError("MMS 입력 텍스트가 비어 있습니다.")
    # 한국어 입력은 uroman 전처리가 필요할 수 있음
    try:
        from uroman import uroman
        text = uroman(text)
    except Exception:
        pass
    inputs = tokenizer(text, return_tensors="pt")
    if inputs["input_ids"].shape[1] == 0:
        raise RuntimeError("MMS 토크나이저 입력이 비어 있습니다.")
    with torch.no_grad():
        audio = model(**inputs).waveform.squeeze().cpu().numpy()
    sample_rate = getattr(model.config, "sampling_rate", 22050)
-    sf.write(wav_path, audio, sample_rate)
+    # MMS 출력은 float이므로 PCM16으로 저장해 왜곡을 줄입니다.
    sf.write(wav_path, audio, sample_rate, subtype="PCM_16")
-def _select_korean_voice(engine: pyttsx3.Engine) -> None:
+def _select_korean_voice(engine: pyttsx3.Engine, prefer_female: bool = False) -> None:
    try:
        voices = engine.getProperty("voices") or []
    except Exception:
        return
-    for voice in voices:
+    def _voice_info(v):
-        lang_values = []
+        values = []
-        if getattr(voice, "languages", None):
+        if getattr(v, "languages", None):
-            lang_values.extend(voice.languages)
+            values.extend(v.languages)
-        if getattr(voice, "id", None):
+        if getattr(v, "id", None):
-            lang_values.append(voice.id)
+            values.append(v.id)
-        if getattr(voice, "name", None):
+        if getattr(v, "name", None):
-            lang_values.append(voice.name)
+            values.append(v.name)
        return " ".join(str(x) for x in values).lower()
-        joined = " ".join(str(v) for v in lang_values).lower()
+    def _is_korean(info: str) -> bool:
-        if "ko" in joined or "korean" in joined:
+        return "ko" in info or "korean" in info
    def _is_female(info: str) -> bool:
        return any(token in info for token in ["female", "woman", "girl", "여성", "여자"])
    if prefer_female:
        for voice in voices:
            info = _voice_info(voice)
            if _is_korean(info) and _is_female(info):
                try:
                    engine.setProperty("voice", voice.id)
                    return
                except Exception:
                    continue
    for voice in voices:
        info = _voice_info(voice)
        if _is_korean(info):
            try:
                engine.setProperty("voice", voice.id)
                return
@@ -73,21 +153,89 @@ def _select_korean_voice(engine: pyttsx3.Engine) -> None:
                continue
-def text_to_mp3(text: str, mp3_path: str) -> None:
+def _spell_abbrev(match: re.Match) -> str:
    return " ".join(_LETTER_KO.get(ch, ch) for ch in match.group(0))
 def _sino_korean(num: int) -> str:
    if num == 0:
        return _NUM_KO[0]
    parts = []
    if num >= 1000:
        thousands = num // 1000
        if thousands > 1:
            parts.append(_NUM_KO[thousands])
        parts.append("천")
        num %= 1000
    if num >= 100:
        hundreds = num // 100
        if hundreds > 1:
            parts.append(_NUM_KO[hundreds])
        parts.append("백")
        num %= 100
    if num >= 10:
        tens = num // 10
        if tens > 1:
            parts.append(_NUM_KO[tens])
        parts.append("십")
        num %= 10
    if num > 0:
        parts.append(_NUM_KO[num])
    return "".join(parts)
 def _replace_numbers(text: str) -> str:
    def _year(match: re.Match) -> str:
        return f"{_sino_korean(int(match.group(1)))}년"
    def _month_day(match: re.Match) -> str:
        month = _sino_korean(int(match.group(1)))
        day = _sino_korean(int(match.group(2)))
        return f"{month}월 {day}일"
    def _approx(match: re.Match) -> str:
        return f"{_sino_korean(int(match.group(1)))}여"
    def _count(match: re.Match) -> str:
        return f"{_sino_korean(int(match.group(1)))}명"
    text = re.sub(r"\b(\d{4})\s*년\b", _year, text)
    text = re.sub(r"\b(\d{1,2})\s*월\s*(\d{1,2})\s*일\b", _month_day, text)
    text = re.sub(r"\b(\d+)\s*여\b", _approx, text)
    text = re.sub(r"\b(\d+)\s*명\b", _count, text)
    return text
 def _preprocess_text(text: str) -> str:
    # 영어 약어/브랜드 발음 보정
    for src, dst in _PHRASE_MAP:
        text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE)
    text = _replace_numbers(text)
    text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text)
    # 괄호/구두점으로 인한 끊김을 완화
    text = text.replace("(", " ").replace(")", " ")
    return text
 def text_to_mp3(text: str, mp3_path: str, voice: Optional[str] = None) -> None:
    if not text:
        raise RuntimeError("텍스트가 비어 있습니다.")
    text = _preprocess_text(text)
    mp3_target = Path(mp3_path)
    mp3_target.parent.mkdir(parents=True, exist_ok=True)
    tts_engine = os.getenv("TTS_ENGINE", "pyttsx3").strip().lower()
    voice = (voice or "").strip().lower() or None
    wav_fd, wav_path = tempfile.mkstemp(suffix=".wav")
    os.close(wav_fd)
    try:
        if tts_engine == "mms":
            _text_to_wav_mms(text, wav_path)
-            audio_filter = "loudnorm=I=-16:LRA=11:TP=-1.5"
+            audio_filter = "highpass=f=80,lowpass=f=12000"
        else:
            engine = pyttsx3.init()
            # 음질 개선: 속도/볼륨 조정 및 한국어 음성 우선 선택
@@ -97,7 +245,7 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
                engine.setProperty("volume", 1.0)
            except Exception:
                pass
-            _select_korean_voice(engine)
+            _select_korean_voice(engine, prefer_female=voice == "female")
            # pyttsx3로 wav 생성 후 ffmpeg로 mp3 변환
            engine.save_to_file(text, wav_path)
            engine.runAndWait()
Author	SHA1	Message	Date
dsyoon	de9015c00d	Update voice option labels Rename voice selector labels in the client template.	2026-01-30 21:37:27 +09:00
dsyoon	21a29a6c8a	Add voice selection control Expose a voice selector next to the save button and pass the choice to TTS so pyttsx3 can prefer a female voice.	2026-01-30 21:32:20 +09:00
dsyoon	ebd6a574d4	Update page title Refresh the HTML title text for the client page.	2026-01-30 20:59:41 +09:00
dsyoon	0601488087	Make list selection explicit Stop auto-downloading on list click and highlight the selected item; download only when the button is pressed.	2026-01-30 20:55:50 +09:00
dsyoon	1d92f2f4fa	Store mp3 size in database Persist file size at creation and backfill missing sizes on list responses so the UI can display sizes reliably.	2026-01-30 20:51:18 +09:00
dsyoon	7ee89d0629	Show file sizes in list Include mp3 size display in API responses and render it in the list; remove obsolete Apache config.	2026-01-30 20:47:52 +09:00
dsyoon	e240153e3f	Handle Korean numeric dates in TTS Convert common date and count numerals to Korean readings so MMS outputs month/day and attendee counts correctly.	2026-01-30 20:42:36 +09:00
dsyoon	35dae473ec	Apply uroman preprocessing for MMS Romanize Korean input for MMS, guard empty tokens, and add uroman dependency.	2026-01-30 20:32:24 +09:00
dsyoon	9b3a743c52	Reduce MMS audio distortion Write MMS wav output as PCM16, simplify filters, and normalize punctuation to avoid garbled speech.	2026-01-30 20:24:44 +09:00
dsyoon	8cdfa1bf4f	Normalize English terms for Korean TTS Preprocess text to spell abbreviations and map common English phrases to Korean pronunciation.	2026-01-30 20:17:17 +09:00