Reduce MMS audio distortion

Write MMS wav output as PCM16, simplify filters, and normalize punctuation to avoid garbled speech.
2026-01-30 20:24:44 +09:00
parent 8cdfa1bf4f
commit 9b3a743c52
1 changed files with 5 additions and 2 deletions
--- a/server/tts_service.py
+++ b/server/tts_service.py
@@ -80,7 +80,8 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None:
        audio = model(**inputs).waveform.squeeze().cpu().numpy()

    sample_rate = getattr(model.config, "sampling_rate", 22050)
-    sf.write(wav_path, audio, sample_rate)
+    # MMS 출력은 float이므로 PCM16으로 저장해 왜곡을 줄입니다.
+    sf.write(wav_path, audio, sample_rate, subtype="PCM_16")


 def _select_korean_voice(engine: pyttsx3.Engine) -> None:
@@ -116,6 +117,8 @@ def _preprocess_text(text: str) -> str:
    for src, dst in _PHRASE_MAP:
        text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE)
    text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text)
+    # 괄호/구두점으로 인한 끊김을 완화
+    text = text.replace("(", " ").replace(")", " ")
    return text


@@ -135,7 +138,7 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
    try:
        if tts_engine == "mms":
            _text_to_wav_mms(text, wav_path)
-            audio_filter = "loudnorm=I=-16:LRA=11:TP=-1.5"
+            audio_filter = "highpass=f=80,lowpass=f=12000"
        else:
            engine = pyttsx3.init()
            # 음질 개선: 속도/볼륨 조정 및 한국어 음성 우선 선택