Reduce MMS audio distortion

Write MMS wav output as PCM16, simplify filters, and normalize punctuation to avoid garbled speech.
This commit is contained in:
dsyoon
2026-01-30 20:24:44 +09:00
parent 8cdfa1bf4f
commit 9b3a743c52

View File

@@ -80,7 +80,8 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None:
audio = model(**inputs).waveform.squeeze().cpu().numpy()
sample_rate = getattr(model.config, "sampling_rate", 22050)
sf.write(wav_path, audio, sample_rate)
# MMS 출력은 float이므로 PCM16으로 저장해 왜곡을 줄입니다.
sf.write(wav_path, audio, sample_rate, subtype="PCM_16")
def _select_korean_voice(engine: pyttsx3.Engine) -> None:
@@ -116,6 +117,8 @@ def _preprocess_text(text: str) -> str:
for src, dst in _PHRASE_MAP:
text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE)
text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text)
# 괄호/구두점으로 인한 끊김을 완화
text = text.replace("(", " ").replace(")", " ")
return text
@@ -135,7 +138,7 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
try:
if tts_engine == "mms":
_text_to_wav_mms(text, wav_path)
audio_filter = "loudnorm=I=-16:LRA=11:TP=-1.5"
audio_filter = "highpass=f=80,lowpass=f=12000"
else:
engine = pyttsx3.init()
# 음질 개선: 속도/볼륨 조정 및 한국어 음성 우선 선택