Apply uroman preprocessing for MMS

Romanize Korean input for MMS, guard empty tokens, and add uroman dependency.
This commit is contained in:
dsyoon
2026-01-30 20:32:24 +09:00
parent 9b3a743c52
commit 35dae473ec
2 changed files with 16 additions and 0 deletions

View File

@@ -7,3 +7,4 @@ jinja2
torch
transformers
soundfile
uroman

View File

@@ -75,7 +75,22 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None:
raise RuntimeError("MMS TTS 사용을 위해 soundfile 설치가 필요합니다.") from exc
model, tokenizer = _get_mms()
text = text.strip()
if not text:
raise RuntimeError("MMS 입력 텍스트가 비어 있습니다.")
# 한국어 입력은 uroman 전처리가 필요할 수 있음
try:
from uroman import uroman
text = uroman(text)
except Exception:
pass
inputs = tokenizer(text, return_tensors="pt")
if inputs["input_ids"].shape[1] == 0:
raise RuntimeError("MMS 토크나이저 입력이 비어 있습니다.")
with torch.no_grad():
audio = model(**inputs).waveform.squeeze().cpu().numpy()