Reduce MMS audio distortion
Write MMS wav output as PCM16, simplify filters, and normalize punctuation to avoid garbled speech.
This commit is contained in:
@@ -80,7 +80,8 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None:
|
||||
audio = model(**inputs).waveform.squeeze().cpu().numpy()
|
||||
|
||||
sample_rate = getattr(model.config, "sampling_rate", 22050)
|
||||
sf.write(wav_path, audio, sample_rate)
|
||||
# MMS 출력은 float이므로 PCM16으로 저장해 왜곡을 줄입니다.
|
||||
sf.write(wav_path, audio, sample_rate, subtype="PCM_16")
|
||||
|
||||
|
||||
def _select_korean_voice(engine: pyttsx3.Engine) -> None:
|
||||
@@ -116,6 +117,8 @@ def _preprocess_text(text: str) -> str:
|
||||
for src, dst in _PHRASE_MAP:
|
||||
text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text)
|
||||
# 괄호/구두점으로 인한 끊김을 완화
|
||||
text = text.replace("(", " ").replace(")", " ")
|
||||
return text
|
||||
|
||||
|
||||
@@ -135,7 +138,7 @@ def text_to_mp3(text: str, mp3_path: str) -> None:
|
||||
try:
|
||||
if tts_engine == "mms":
|
||||
_text_to_wav_mms(text, wav_path)
|
||||
audio_filter = "loudnorm=I=-16:LRA=11:TP=-1.5"
|
||||
audio_filter = "highpass=f=80,lowpass=f=12000"
|
||||
else:
|
||||
engine = pyttsx3.init()
|
||||
# 음질 개선: 속도/볼륨 조정 및 한국어 음성 우선 선택
|
||||
|
||||
Reference in New Issue
Block a user