Add voice selection control

Expose a voice selector next to the save button and pass the choice to TTS so pyttsx3 can prefer a female voice.
2026-01-30 21:32:20 +09:00
parent ebd6a574d4
commit 21a29a6c8a
5 changed files with 60 additions and 16 deletions
--- a/server/main.py
+++ b/server/main.py
@@ -48,6 +48,7 @@ templates = Jinja2Templates(directory=str(CLIENT_DIR / "templates"))

 class TtsCreateRequest(BaseModel):
    text: str
+    voice: str | None = None


 class TtsDeleteRequest(BaseModel):
@@ -125,6 +126,7 @@ def api_list_tts():
@app.post("/api/tts")
 def api_create_tts(payload: TtsCreateRequest):
    text = (payload.text or "").strip()
+    voice = (payload.voice or "").strip().lower()
    if len(text) < 11:
        raise HTTPException(status_code=400, detail="텍스트는 11글자 이상이어야 합니다.")

@@ -137,7 +139,7 @@ def api_create_tts(payload: TtsCreateRequest):
    mp3_path = RESOURCES_DIR / filename

    try:
-        text_to_mp3(text=text, mp3_path=str(mp3_path))
+        text_to_mp3(text=text, mp3_path=str(mp3_path), voice=voice)
    except Exception as exc:
        logger.exception("TTS 생성 실패")
        delete_item_by_id(tts_id)
--- a/server/tts_service.py
+++ b/server/tts_service.py
@@ -111,23 +111,41 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None:
    sf.write(wav_path, audio, sample_rate, subtype="PCM_16")


-def _select_korean_voice(engine: pyttsx3.Engine) -> None:
+def _select_korean_voice(engine: pyttsx3.Engine, prefer_female: bool = False) -> None:
    try:
        voices = engine.getProperty("voices") or []
    except Exception:
        return

-    for voice in voices:
-        lang_values = []
-        if getattr(voice, "languages", None):
-            lang_values.extend(voice.languages)
-        if getattr(voice, "id", None):
-            lang_values.append(voice.id)
-        if getattr(voice, "name", None):
-            lang_values.append(voice.name)
+    def _voice_info(v):
+        values = []
+        if getattr(v, "languages", None):
+            values.extend(v.languages)
+        if getattr(v, "id", None):
+            values.append(v.id)
+        if getattr(v, "name", None):
+            values.append(v.name)
+        return " ".join(str(x) for x in values).lower()

-        joined = " ".join(str(v) for v in lang_values).lower()
-        if "ko" in joined or "korean" in joined:
+    def _is_korean(info: str) -> bool:
+        return "ko" in info or "korean" in info
+
+    def _is_female(info: str) -> bool:
+        return any(token in info for token in ["female", "woman", "girl", "여성", "여자"])
+
+    if prefer_female:
+        for voice in voices:
+            info = _voice_info(voice)
+            if _is_korean(info) and _is_female(info):
+                try:
+                    engine.setProperty("voice", voice.id)
+                    return
+                except Exception:
+                    continue
+
+    for voice in voices:
+        info = _voice_info(voice)
+        if _is_korean(info):
            try:
                engine.setProperty("voice", voice.id)
                return
@@ -200,7 +218,7 @@ def _preprocess_text(text: str) -> str:
    return text


-def text_to_mp3(text: str, mp3_path: str) -> None:
+def text_to_mp3(text: str, mp3_path: str, voice: Optional[str] = None) -> None:
    if not text:
        raise RuntimeError("텍스트가 비어 있습니다.")

@@ -210,6 +228,7 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
    mp3_target.parent.mkdir(parents=True, exist_ok=True)

    tts_engine = os.getenv("TTS_ENGINE", "pyttsx3").strip().lower()
+    voice = (voice or "").strip().lower() or None
    wav_fd, wav_path = tempfile.mkstemp(suffix=".wav")
    os.close(wav_fd)

@@ -226,7 +245,7 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
                engine.setProperty("volume", 1.0)
            except Exception:
                pass
-            _select_korean_voice(engine)
+            _select_korean_voice(engine, prefer_female=voice == "female")
            # pyttsx3로 wav 생성 후 ffmpeg로 mp3 변환
            engine.save_to_file(text, wav_path)
            engine.runAndWait()