Apply uroman preprocessing for MMS

Romanize Korean input for MMS, guard empty tokens, and add uroman dependency.
2026-01-30 20:32:24 +09:00
parent 9b3a743c52
commit 35dae473ec
2 changed files with 16 additions and 0 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ jinja2
 torch
 transformers
 soundfile
+uroman
--- a/server/tts_service.py
+++ b/server/tts_service.py
@@ -75,7 +75,22 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None:
        raise RuntimeError("MMS TTS 사용을 위해 soundfile 설치가 필요합니다.") from exc

    model, tokenizer = _get_mms()
+
+    text = text.strip()
+    if not text:
+        raise RuntimeError("MMS 입력 텍스트가 비어 있습니다.")
+
+    # 한국어 입력은 uroman 전처리가 필요할 수 있음
+    try:
+        from uroman import uroman
+
+        text = uroman(text)
+    except Exception:
+        pass
+
    inputs = tokenizer(text, return_tensors="pt")
+    if inputs["input_ids"].shape[1] == 0:
+        raise RuntimeError("MMS 토크나이저 입력이 비어 있습니다.")
    with torch.no_grad():
        audio = model(**inputs).waveform.squeeze().cpu().numpy()