Update voice option labels

Rename voice selector labels in the client template.
Add voice selection control
2026-01-30 21:37:27 +09:00 · 2026-01-30 21:32:20 +09:00 · 2026-01-30 20:59:41 +09:00 · 2026-01-30 20:55:50 +09:00 · 2026-01-30 20:51:18 +09:00 · 2026-01-30 20:47:52 +09:00
8 changed files with 300 additions and 54 deletions
--- a/apache/tts.conf
+++ b/apache/tts.conf
@@ -1,21 +0,0 @@
-<VirtualHost *:80>
-  ServerName tts.ncue.net
-  Redirect permanent / https://tts.ncue.net/
-</VirtualHost>
-
-<VirtualHost *:443>
-  ServerName tts.ncue.net
-
-  SSLEngine on
-  SSLCertificateFile /etc/letsencrypt/live/ncue.net/fullchain.pem
-  SSLCertificateKeyFile /etc/letsencrypt/live/ncue.net/privkey.pem
-  # 선택: 체인 파일이 필요하면 아래 경로 사용
-  # SSLCertificateChainFile /etc/letsencrypt/live/ncue.net/chain.pem
-
-  ProxyRequests Off
-  ProxyPreserveHost On
-
-  # 전체 프록시 (FastAPI가 정적/템플릿 포함 제공)
-  ProxyPass / http://127.0.0.1:8019/
-  ProxyPassReverse / http://127.0.0.1:8019/
-</VirtualHost>
--- a/client/static/app.js
+++ b/client/static/app.js
@@ -1,6 +1,7 @@
 const listEl = document.getElementById("tts-list");
 const textInput = document.getElementById("text-input");
 const saveBtn = document.getElementById("save-btn");
+const voiceSelect = document.getElementById("voice-select");
 const editBtn = document.getElementById("edit-btn");
 const deleteBtn = document.getElementById("delete-btn");
 const cancelBtn = document.getElementById("cancel-btn");
@@ -12,6 +13,8 @@ let items = [];
 let editMode = false;
 const selectedIds = new Set();
 let progressTimer = null;
+let selectedItemId = null;
+let selectedDownloadUrl = null;

 function startProgress() {
  let value = 0;
@@ -50,10 +53,13 @@ function finishProgress(success = true) {
 function setEditMode(isEdit) {
  editMode = isEdit;
  selectedIds.clear();
+  selectedItemId = null;
+  selectedDownloadUrl = null;
  editBtn.classList.toggle("hidden", editMode);
  deleteBtn.classList.toggle("hidden", !editMode);
  cancelBtn.classList.toggle("hidden", !editMode);
  downloadLink.classList.add("hidden");
+  downloadLink.href = "#";
  renderList();
 }

@@ -62,6 +68,9 @@ function renderList() {
  items.forEach((item) => {
    const li = document.createElement("li");
    li.className = "tts-item";
+    if (!editMode && selectedItemId === item.id) {
+      li.classList.add("selected");
+    }

    if (editMode) {
      const checkbox = document.createElement("input");
@@ -84,7 +93,9 @@ function renderList() {
    }

    const label = document.createElement("span");
-    label.textContent = item.display_time;
+    label.textContent = item.size_display
+      ? `${item.display_time} (${item.size_display})`
+      : item.display_time;
    label.className = "item-label";
    li.appendChild(label);

@@ -101,6 +112,7 @@ async function loadList() {

 async function handleSave() {
  const text = (textInput.value || "").trim();
+  const voice = (voiceSelect?.value || "male").trim();
  if (text.length < 11) {
    alert("10개 글자 이상이어야 합니다");
    return;
@@ -110,7 +122,7 @@ async function handleSave() {
  const res = await fetch("/api/tts", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
-    body: JSON.stringify({ text }),
+    body: JSON.stringify({ text, voice }),
  });

  if (!res.ok) {
@@ -137,6 +149,15 @@ async function handleItemClick(item) {
    return;
  }

+  if (selectedItemId === item.id) {
+    selectedItemId = null;
+    selectedDownloadUrl = null;
+    downloadLink.href = "#";
+    downloadLink.classList.add("hidden");
+    renderList();
+    return;
+  }
+
  const res = await fetch(`/api/tts/${item.id}`);
  if (!res.ok) {
    alert("항목을 불러오지 못했습니다.");
@@ -145,9 +166,11 @@ async function handleItemClick(item) {

  const data = await res.json();
  textInput.value = data.text || "";
-  downloadLink.href = data.download_url;
+  selectedItemId = item.id;
+  selectedDownloadUrl = data.download_url;
+  downloadLink.href = selectedDownloadUrl;
  downloadLink.classList.remove("hidden");
-  downloadLink.click();
+  renderList();
 }

 async function handleDelete() {
@@ -172,6 +195,8 @@ async function handleDelete() {
  const deletedSet = new Set(data.deleted || []);
  items = items.filter((item) => !deletedSet.has(item.id));
  textInput.value = "";
+  selectedItemId = null;
+  selectedDownloadUrl = null;
  downloadLink.href = "#";
  downloadLink.classList.add("hidden");
  setEditMode(false);
@@ -181,5 +206,11 @@ saveBtn.addEventListener("click", handleSave);
 editBtn.addEventListener("click", () => setEditMode(true));
 cancelBtn.addEventListener("click", () => setEditMode(false));
 deleteBtn.addEventListener("click", handleDelete);
+downloadLink.addEventListener("click", (event) => {
+  if (!selectedDownloadUrl) {
+    event.preventDefault();
+    alert("다운로드할 항목을 선택하세요.");
+  }
+});

 loadList();
--- a/client/static/styles.css
+++ b/client/static/styles.css
@@ -42,6 +42,21 @@ textarea {
  flex: 1;
 }

+.save-row {
+  display: flex;
+  gap: 12px;
+  align-items: center;
+}
+
+.voice-select {
+  border: 1px solid #c9c9c9;
+  border-radius: 4px;
+  padding: 10px 12px;
+  font-size: 14px;
+  background: #ffffff;
+  color: #333;
+}
+
 button {
  border: none;
  padding: 12px 18px;
@@ -101,6 +116,12 @@ button.danger {
  background: #f1f6f9;
 }

+.tts-item.selected {
+  background: #f7f7f7;
+  border-left: 4px solid #e2e2e2;
+  padding-left: 2px;
+}
+
 .bullet {
  font-size: 18px;
  color: #555;
--- a/client/templates/index.html
+++ b/client/templates/index.html
@@ -3,7 +3,7 @@
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>TTS 저장/조회/삭제</title>
+    <title>글소리 (TTS, 텍스트→MP3변환)</title>
    <link rel="stylesheet" href="/static/styles.css" />
  </head>
  <body>
@@ -11,7 +11,13 @@
      <section class="panel left">
        <div class="panel-header">입력 텍스트</div>
        <textarea id="text-input" rows="16" placeholder="텍스트를 입력하세요"></textarea>
-        <button id="save-btn" class="primary">mp3 저장</button>
+        <div class="save-row">
+          <button id="save-btn" class="primary">mp3 변환</button>
+          <select id="voice-select" class="voice-select" aria-label="음성 선택">
+            <option value="male">음성 #1</option>
+            <option value="female">음성 #2</option>
+          </select>
+        </div>
        <div id="save-progress" class="progress-wrap hidden" aria-label="저장 진행률">
          <div
            id="save-progress-bar"
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ jinja2
 torch
 transformers
 soundfile
+uroman
--- a/server/db.py
+++ b/server/db.py
@@ -29,10 +29,17 @@ def init_db():
                    id SERIAL PRIMARY KEY,
                    text TEXT NOT NULL,
                    filename TEXT,
+                    size_bytes BIGINT,
                    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
                );
                """
            )
+            cur.execute(
+                """
+                ALTER TABLE tts_items
+                ADD COLUMN IF NOT EXISTS size_bytes BIGINT;
+                """
+            )
            cur.execute(
                """
                CREATE INDEX IF NOT EXISTS tts_items_created_at_idx
@@ -72,12 +79,26 @@ def update_filename(tts_id: int, filename: str) -> None:
        conn.commit()


+def update_size_bytes(tts_id: int, size_bytes: int) -> None:
+    with get_conn() as conn:
+        with conn.cursor() as cur:
+            cur.execute(
+                """
+                UPDATE tts_items
+                SET size_bytes = %s
+                WHERE id = %s;
+                """,
+                (size_bytes, tts_id),
+            )
+        conn.commit()
+
+
 def list_items() -> List[Dict[str, Any]]:
    with get_conn() as conn:
        with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
            cur.execute(
                """
-                SELECT id, created_at, filename
+                SELECT id, created_at, filename, size_bytes
                FROM tts_items
                ORDER BY created_at DESC;
                """
@@ -91,7 +112,7 @@ def get_item(tts_id: int) -> Optional[Dict[str, Any]]:
        with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
            cur.execute(
                """
-                SELECT id, text, filename, created_at
+                SELECT id, text, filename, size_bytes, created_at
                FROM tts_items
                WHERE id = %s;
                """,
--- a/server/main.py
+++ b/server/main.py
@@ -15,6 +15,7 @@ from .db import (
    init_db,
    create_item,
    update_filename,
+    update_size_bytes,
    list_items,
    get_item,
    delete_items,
@@ -47,6 +48,7 @@ templates = Jinja2Templates(directory=str(CLIENT_DIR / "templates"))

 class TtsCreateRequest(BaseModel):
    text: str
+    voice: str | None = None


 class TtsDeleteRequest(BaseModel):
@@ -64,6 +66,29 @@ def ensure_resources_dir():
    RESOURCES_DIR.mkdir(parents=True, exist_ok=True)


+def format_size(bytes_size: int) -> str:
+    if bytes_size < 1024:
+        return f"{bytes_size}B"
+    if bytes_size < 1024 * 1024:
+        return f"{bytes_size / 1024:.1f}KB"
+    return f"{bytes_size / (1024 * 1024):.1f}MB"
+
+
+def get_file_size_display(size_bytes: int | None) -> str | None:
+    if size_bytes is None:
+        return None
+    return format_size(size_bytes)
+
+
+def get_file_size_bytes(filename: str | None) -> int | None:
+    if not filename:
+        return None
+    file_path = RESOURCES_DIR / filename
+    if not file_path.exists():
+        return None
+    return file_path.stat().st_size
+
+
@app.on_event("startup")
 def on_startup():
    ensure_resources_dir()
@@ -78,20 +103,30 @@ def index(request: Request):
@app.get("/api/tts")
 def api_list_tts():
    rows = list_items()
-    return [
-        {
-            "id": row["id"],
-            "created_at": row["created_at"].isoformat(),
-            "display_time": format_display_time(row["created_at"]),
-            "filename": row["filename"],
-        }
-        for row in rows
-    ]
+    payload = []
+    for row in rows:
+        size_bytes = row.get("size_bytes")
+        if size_bytes is None and row.get("filename"):
+            computed = get_file_size_bytes(row["filename"])
+            if computed is not None:
+                update_size_bytes(row["id"], computed)
+                size_bytes = computed
+        payload.append(
+            {
+                "id": row["id"],
+                "created_at": row["created_at"].isoformat(),
+                "display_time": format_display_time(row["created_at"]),
+                "filename": row["filename"],
+                "size_display": get_file_size_display(size_bytes),
+            }
+        )
+    return payload


@app.post("/api/tts")
 def api_create_tts(payload: TtsCreateRequest):
    text = (payload.text or "").strip()
+    voice = (payload.voice or "").strip().lower()
    if len(text) < 11:
        raise HTTPException(status_code=400, detail="텍스트는 11글자 이상이어야 합니다.")

@@ -104,19 +139,23 @@ def api_create_tts(payload: TtsCreateRequest):
    mp3_path = RESOURCES_DIR / filename

    try:
-        text_to_mp3(text=text, mp3_path=str(mp3_path))
+        text_to_mp3(text=text, mp3_path=str(mp3_path), voice=voice)
    except Exception as exc:
        logger.exception("TTS 생성 실패")
        delete_item_by_id(tts_id)
        raise HTTPException(status_code=500, detail=str(exc)) from exc

+    size_bytes = get_file_size_bytes(filename)
    update_filename(tts_id, filename)
+    if size_bytes is not None:
+        update_size_bytes(tts_id, size_bytes)

    return {
        "id": tts_id,
        "created_at": created_at.isoformat(),
        "display_time": format_display_time(created_at),
        "filename": filename,
+        "size_display": get_file_size_display(size_bytes),
    }


--- a/server/tts_service.py
+++ b/server/tts_service.py
@@ -1,4 +1,5 @@
 import os
+import re
 import subprocess
 import tempfile
 from pathlib import Path
@@ -7,6 +8,51 @@ from typing import Optional, Tuple
 import pyttsx3

 _MMS_CACHE: Optional[Tuple[object, object]] = None
+_LETTER_KO = {
+    "A": "에이",
+    "B": "비",
+    "C": "씨",
+    "D": "디",
+    "E": "이",
+    "F": "에프",
+    "G": "지",
+    "H": "에이치",
+    "I": "아이",
+    "J": "제이",
+    "K": "케이",
+    "L": "엘",
+    "M": "엠",
+    "N": "엔",
+    "O": "오",
+    "P": "피",
+    "Q": "큐",
+    "R": "알",
+    "S": "에스",
+    "T": "티",
+    "U": "유",
+    "V": "브이",
+    "W": "더블유",
+    "X": "엑스",
+    "Y": "와이",
+    "Z": "지",
+}
+_PHRASE_MAP = [
+    ("Automatic Document Feeder", "오토매틱 도큐먼트 피더"),
+    ("Naver Blog", "네이버 블로그"),
+    ("Brother Korea", "브라더 코리아"),
+]
+_NUM_KO = {
+    0: "영",
+    1: "일",
+    2: "이",
+    3: "삼",
+    4: "사",
+    5: "오",
+    6: "육",
+    7: "칠",
+    8: "팔",
+    9: "구",
+}


 def _get_mms():
@@ -41,31 +87,65 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None:
        raise RuntimeError("MMS TTS 사용을 위해 soundfile 설치가 필요합니다.") from exc

    model, tokenizer = _get_mms()
+
+    text = text.strip()
+    if not text:
+        raise RuntimeError("MMS 입력 텍스트가 비어 있습니다.")
+
+    # 한국어 입력은 uroman 전처리가 필요할 수 있음
+    try:
+        from uroman import uroman
+
+        text = uroman(text)
+    except Exception:
+        pass
+
    inputs = tokenizer(text, return_tensors="pt")
+    if inputs["input_ids"].shape[1] == 0:
+        raise RuntimeError("MMS 토크나이저 입력이 비어 있습니다.")
    with torch.no_grad():
        audio = model(**inputs).waveform.squeeze().cpu().numpy()

    sample_rate = getattr(model.config, "sampling_rate", 22050)
-    sf.write(wav_path, audio, sample_rate)
+    # MMS 출력은 float이므로 PCM16으로 저장해 왜곡을 줄입니다.
+    sf.write(wav_path, audio, sample_rate, subtype="PCM_16")


-def _select_korean_voice(engine: pyttsx3.Engine) -> None:
+def _select_korean_voice(engine: pyttsx3.Engine, prefer_female: bool = False) -> None:
    try:
        voices = engine.getProperty("voices") or []
    except Exception:
        return

-    for voice in voices:
-        lang_values = []
-        if getattr(voice, "languages", None):
-            lang_values.extend(voice.languages)
-        if getattr(voice, "id", None):
-            lang_values.append(voice.id)
-        if getattr(voice, "name", None):
-            lang_values.append(voice.name)
+    def _voice_info(v):
+        values = []
+        if getattr(v, "languages", None):
+            values.extend(v.languages)
+        if getattr(v, "id", None):
+            values.append(v.id)
+        if getattr(v, "name", None):
+            values.append(v.name)
+        return " ".join(str(x) for x in values).lower()

-        joined = " ".join(str(v) for v in lang_values).lower()
-        if "ko" in joined or "korean" in joined:
+    def _is_korean(info: str) -> bool:
+        return "ko" in info or "korean" in info
+
+    def _is_female(info: str) -> bool:
+        return any(token in info for token in ["female", "woman", "girl", "여성", "여자"])
+
+    if prefer_female:
+        for voice in voices:
+            info = _voice_info(voice)
+            if _is_korean(info) and _is_female(info):
+                try:
+                    engine.setProperty("voice", voice.id)
+                    return
+                except Exception:
+                    continue
+
+    for voice in voices:
+        info = _voice_info(voice)
+        if _is_korean(info):
            try:
                engine.setProperty("voice", voice.id)
                return
@@ -73,21 +153,89 @@ def _select_korean_voice(engine: pyttsx3.Engine) -> None:
                continue


-def text_to_mp3(text: str, mp3_path: str) -> None:
+def _spell_abbrev(match: re.Match) -> str:
+    return " ".join(_LETTER_KO.get(ch, ch) for ch in match.group(0))
+
+
+def _sino_korean(num: int) -> str:
+    if num == 0:
+        return _NUM_KO[0]
+
+    parts = []
+    if num >= 1000:
+        thousands = num // 1000
+        if thousands > 1:
+            parts.append(_NUM_KO[thousands])
+        parts.append("천")
+        num %= 1000
+    if num >= 100:
+        hundreds = num // 100
+        if hundreds > 1:
+            parts.append(_NUM_KO[hundreds])
+        parts.append("백")
+        num %= 100
+    if num >= 10:
+        tens = num // 10
+        if tens > 1:
+            parts.append(_NUM_KO[tens])
+        parts.append("십")
+        num %= 10
+    if num > 0:
+        parts.append(_NUM_KO[num])
+    return "".join(parts)
+
+
+def _replace_numbers(text: str) -> str:
+    def _year(match: re.Match) -> str:
+        return f"{_sino_korean(int(match.group(1)))}년"
+
+    def _month_day(match: re.Match) -> str:
+        month = _sino_korean(int(match.group(1)))
+        day = _sino_korean(int(match.group(2)))
+        return f"{month}월 {day}일"
+
+    def _approx(match: re.Match) -> str:
+        return f"{_sino_korean(int(match.group(1)))}여"
+
+    def _count(match: re.Match) -> str:
+        return f"{_sino_korean(int(match.group(1)))}명"
+
+    text = re.sub(r"\b(\d{4})\s*년\b", _year, text)
+    text = re.sub(r"\b(\d{1,2})\s*월\s*(\d{1,2})\s*일\b", _month_day, text)
+    text = re.sub(r"\b(\d+)\s*여\b", _approx, text)
+    text = re.sub(r"\b(\d+)\s*명\b", _count, text)
+    return text
+
+
+def _preprocess_text(text: str) -> str:
+    # 영어 약어/브랜드 발음 보정
+    for src, dst in _PHRASE_MAP:
+        text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE)
+    text = _replace_numbers(text)
+    text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text)
+    # 괄호/구두점으로 인한 끊김을 완화
+    text = text.replace("(", " ").replace(")", " ")
+    return text
+
+
+def text_to_mp3(text: str, mp3_path: str, voice: Optional[str] = None) -> None:
    if not text:
        raise RuntimeError("텍스트가 비어 있습니다.")

+    text = _preprocess_text(text)
+
    mp3_target = Path(mp3_path)
    mp3_target.parent.mkdir(parents=True, exist_ok=True)

    tts_engine = os.getenv("TTS_ENGINE", "pyttsx3").strip().lower()
+    voice = (voice or "").strip().lower() or None
    wav_fd, wav_path = tempfile.mkstemp(suffix=".wav")
    os.close(wav_fd)

    try:
        if tts_engine == "mms":
            _text_to_wav_mms(text, wav_path)
-            audio_filter = "loudnorm=I=-16:LRA=11:TP=-1.5"
+            audio_filter = "highpass=f=80,lowpass=f=12000"
        else:
            engine = pyttsx3.init()
            # 음질 개선: 속도/볼륨 조정 및 한국어 음성 우선 선택
@@ -97,7 +245,7 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
                engine.setProperty("volume", 1.0)
            except Exception:
                pass
-            _select_korean_voice(engine)
+            _select_korean_voice(engine, prefer_female=voice == "female")
            # pyttsx3로 wav 생성 후 ffmpeg로 mp3 변환
            engine.save_to_file(text, wav_path)
            engine.runAndWait()
Author	SHA1	Message	Date
dsyoon	de9015c00d	Update voice option labels Rename voice selector labels in the client template.	2026-01-30 21:37:27 +09:00
dsyoon	21a29a6c8a	Add voice selection control Expose a voice selector next to the save button and pass the choice to TTS so pyttsx3 can prefer a female voice.	2026-01-30 21:32:20 +09:00
dsyoon	ebd6a574d4	Update page title Refresh the HTML title text for the client page.	2026-01-30 20:59:41 +09:00
dsyoon	0601488087	Make list selection explicit Stop auto-downloading on list click and highlight the selected item; download only when the button is pressed.	2026-01-30 20:55:50 +09:00
dsyoon	1d92f2f4fa	Store mp3 size in database Persist file size at creation and backfill missing sizes on list responses so the UI can display sizes reliably.	2026-01-30 20:51:18 +09:00
dsyoon	7ee89d0629	Show file sizes in list Include mp3 size display in API responses and render it in the list; remove obsolete Apache config.	2026-01-30 20:47:52 +09:00
dsyoon	e240153e3f	Handle Korean numeric dates in TTS Convert common date and count numerals to Korean readings so MMS outputs month/day and attendee counts correctly.	2026-01-30 20:42:36 +09:00
dsyoon	35dae473ec	Apply uroman preprocessing for MMS Romanize Korean input for MMS, guard empty tokens, and add uroman dependency.	2026-01-30 20:32:24 +09:00
dsyoon	9b3a743c52	Reduce MMS audio distortion Write MMS wav output as PCM16, simplify filters, and normalize punctuation to avoid garbled speech.	2026-01-30 20:24:44 +09:00
dsyoon	8cdfa1bf4f	Normalize English terms for Korean TTS Preprocess text to spell abbreviations and map common English phrases to Korean pronunciation.	2026-01-30 20:17:17 +09:00