Improve delete UX and TTS audio quality

Clear text on delete and enhance TTS output with Korean voice selection and ffmpeg audio normalization.
2026-01-30 15:26:09 +09:00
parent 373299e0cb
commit 6b40d23c7e
2 changed files with 49 additions and 1 deletions
--- a/server/tts_service.py
+++ b/server/tts_service.py
@@ -6,6 +6,30 @@ from pathlib import Path
 import pyttsx3


+def _select_korean_voice(engine: pyttsx3.Engine) -> None:
+    try:
+        voices = engine.getProperty("voices") or []
+    except Exception:
+        return
+
+    for voice in voices:
+        lang_values = []
+        if getattr(voice, "languages", None):
+            lang_values.extend(voice.languages)
+        if getattr(voice, "id", None):
+            lang_values.append(voice.id)
+        if getattr(voice, "name", None):
+            lang_values.append(voice.name)
+
+        joined = " ".join(str(v) for v in lang_values).lower()
+        if "ko" in joined or "korean" in joined:
+            try:
+                engine.setProperty("voice", voice.id)
+                return
+            except Exception:
+                continue
+
+
 def text_to_mp3(text: str, mp3_path: str) -> None:
    if not text:
        raise RuntimeError("텍스트가 비어 있습니다.")
@@ -14,6 +38,13 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
    mp3_target.parent.mkdir(parents=True, exist_ok=True)

    engine = pyttsx3.init()
+    # 음질 개선: 속도/볼륨 조정 및 한국어 음성 우선 선택
+    try:
+        engine.setProperty("rate", 170)
+        engine.setProperty("volume", 1.0)
+    except Exception:
+        pass
+    _select_korean_voice(engine)
    wav_fd, wav_path = tempfile.mkstemp(suffix=".wav")
    os.close(wav_fd)

@@ -23,7 +54,21 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
        engine.runAndWait()

        subprocess.run(
-            ["ffmpeg", "-y", "-i", wav_path, str(mp3_target)],
+            [
+                "ffmpeg",
+                "-y",
+                "-i",
+                wav_path,
+                "-ac",
+                "2",
+                "-ar",
+                "44100",
+                "-b:a",
+                "192k",
+                "-af",
+                "loudnorm=I=-16:LRA=11:TP=-1.5",
+                str(mp3_target),
+            ],
            check=True,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,