diff --git a/server/tts_service.py b/server/tts_service.py index 450a4f2..d28f709 100644 --- a/server/tts_service.py +++ b/server/tts_service.py @@ -80,7 +80,8 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None: audio = model(**inputs).waveform.squeeze().cpu().numpy() sample_rate = getattr(model.config, "sampling_rate", 22050) - sf.write(wav_path, audio, sample_rate) + # MMS 출력은 float이므로 PCM16으로 저장해 왜곡을 줄입니다. + sf.write(wav_path, audio, sample_rate, subtype="PCM_16") def _select_korean_voice(engine: pyttsx3.Engine) -> None: @@ -116,6 +117,8 @@ def _preprocess_text(text: str) -> str: for src, dst in _PHRASE_MAP: text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE) text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text) + # 괄호/구두점으로 인한 끊김을 완화 + text = text.replace("(", " ").replace(")", " ") return text @@ -135,7 +138,7 @@ def text_to_mp3(text: str, mp3_path: str) -> None: try: if tts_engine == "mms": _text_to_wav_mms(text, wav_path) - audio_filter = "loudnorm=I=-16:LRA=11:TP=-1.5" + audio_filter = "highpass=f=80,lowpass=f=12000" else: engine = pyttsx3.init() # 음질 개선: 속도/볼륨 조정 및 한국어 음성 우선 선택