Add voice selection control

Expose a voice selector next to the save button and pass the choice to TTS so pyttsx3 can prefer a female voice.
This commit is contained in:
dsyoon
2026-01-30 21:32:20 +09:00
parent ebd6a574d4
commit 21a29a6c8a
5 changed files with 60 additions and 16 deletions

View File

@@ -48,6 +48,7 @@ templates = Jinja2Templates(directory=str(CLIENT_DIR / "templates"))
class TtsCreateRequest(BaseModel):
text: str
voice: str | None = None
class TtsDeleteRequest(BaseModel):
@@ -125,6 +126,7 @@ def api_list_tts():
@app.post("/api/tts")
def api_create_tts(payload: TtsCreateRequest):
text = (payload.text or "").strip()
voice = (payload.voice or "").strip().lower()
if len(text) < 11:
raise HTTPException(status_code=400, detail="텍스트는 11글자 이상이어야 합니다.")
@@ -137,7 +139,7 @@ def api_create_tts(payload: TtsCreateRequest):
mp3_path = RESOURCES_DIR / filename
try:
text_to_mp3(text=text, mp3_path=str(mp3_path))
text_to_mp3(text=text, mp3_path=str(mp3_path), voice=voice)
except Exception as exc:
logger.exception("TTS 생성 실패")
delete_item_by_id(tts_id)

View File

@@ -111,23 +111,41 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None:
sf.write(wav_path, audio, sample_rate, subtype="PCM_16")
def _select_korean_voice(engine: pyttsx3.Engine) -> None:
def _select_korean_voice(engine: pyttsx3.Engine, prefer_female: bool = False) -> None:
try:
voices = engine.getProperty("voices") or []
except Exception:
return
for voice in voices:
lang_values = []
if getattr(voice, "languages", None):
lang_values.extend(voice.languages)
if getattr(voice, "id", None):
lang_values.append(voice.id)
if getattr(voice, "name", None):
lang_values.append(voice.name)
def _voice_info(v):
values = []
if getattr(v, "languages", None):
values.extend(v.languages)
if getattr(v, "id", None):
values.append(v.id)
if getattr(v, "name", None):
values.append(v.name)
return " ".join(str(x) for x in values).lower()
joined = " ".join(str(v) for v in lang_values).lower()
if "ko" in joined or "korean" in joined:
def _is_korean(info: str) -> bool:
return "ko" in info or "korean" in info
def _is_female(info: str) -> bool:
return any(token in info for token in ["female", "woman", "girl", "여성", "여자"])
if prefer_female:
for voice in voices:
info = _voice_info(voice)
if _is_korean(info) and _is_female(info):
try:
engine.setProperty("voice", voice.id)
return
except Exception:
continue
for voice in voices:
info = _voice_info(voice)
if _is_korean(info):
try:
engine.setProperty("voice", voice.id)
return
@@ -200,7 +218,7 @@ def _preprocess_text(text: str) -> str:
return text
def text_to_mp3(text: str, mp3_path: str) -> None:
def text_to_mp3(text: str, mp3_path: str, voice: Optional[str] = None) -> None:
if not text:
raise RuntimeError("텍스트가 비어 있습니다.")
@@ -210,6 +228,7 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
mp3_target.parent.mkdir(parents=True, exist_ok=True)
tts_engine = os.getenv("TTS_ENGINE", "pyttsx3").strip().lower()
voice = (voice or "").strip().lower() or None
wav_fd, wav_path = tempfile.mkstemp(suffix=".wav")
os.close(wav_fd)
@@ -226,7 +245,7 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
engine.setProperty("volume", 1.0)
except Exception:
pass
_select_korean_voice(engine)
_select_korean_voice(engine, prefer_female=voice == "female")
# pyttsx3로 wav 생성 후 ffmpeg로 mp3 변환
engine.save_to_file(text, wav_path)
engine.runAndWait()