Files
tts/server/tts_service.py
dsyoon 6b40d23c7e Improve delete UX and TTS audio quality
Clear text on delete and enhance TTS output with Korean voice selection and ffmpeg audio normalization.
2026-01-30 15:26:09 +09:00

88 lines
2.4 KiB
Python

import os
import subprocess
import tempfile
from pathlib import Path
import pyttsx3
def _select_korean_voice(engine: pyttsx3.Engine) -> None:
try:
voices = engine.getProperty("voices") or []
except Exception:
return
for voice in voices:
lang_values = []
if getattr(voice, "languages", None):
lang_values.extend(voice.languages)
if getattr(voice, "id", None):
lang_values.append(voice.id)
if getattr(voice, "name", None):
lang_values.append(voice.name)
joined = " ".join(str(v) for v in lang_values).lower()
if "ko" in joined or "korean" in joined:
try:
engine.setProperty("voice", voice.id)
return
except Exception:
continue
def text_to_mp3(text: str, mp3_path: str) -> None:
if not text:
raise RuntimeError("텍스트가 비어 있습니다.")
mp3_target = Path(mp3_path)
mp3_target.parent.mkdir(parents=True, exist_ok=True)
engine = pyttsx3.init()
# 음질 개선: 속도/볼륨 조정 및 한국어 음성 우선 선택
try:
engine.setProperty("rate", 170)
engine.setProperty("volume", 1.0)
except Exception:
pass
_select_korean_voice(engine)
wav_fd, wav_path = tempfile.mkstemp(suffix=".wav")
os.close(wav_fd)
try:
# pyttsx3로 wav 생성 후 ffmpeg로 mp3 변환
engine.save_to_file(text, wav_path)
engine.runAndWait()
subprocess.run(
[
"ffmpeg",
"-y",
"-i",
wav_path,
"-ac",
"2",
"-ar",
"44100",
"-b:a",
"192k",
"-af",
"loudnorm=I=-16:LRA=11:TP=-1.5",
str(mp3_target),
],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
if not mp3_target.exists():
raise RuntimeError("mp3 파일 생성에 실패했습니다.")
except subprocess.CalledProcessError as exc:
raise RuntimeError("ffmpeg 변환에 실패했습니다.") from exc
except OSError as exc:
raise RuntimeError("파일 생성 권한 또는 경로 오류입니다.") from exc
finally:
try:
os.remove(wav_path)
except OSError:
pass